LLVM  10.0.0
ARMISelLowering.cpp
Go to the documentation of this file.
1 //===- ARMISelLowering.cpp - ARM DAG Lowering Implementation --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that ARM uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "ARMISelLowering.h"
15 #include "ARMBaseInstrInfo.h"
16 #include "ARMBaseRegisterInfo.h"
17 #include "ARMCallingConv.h"
18 #include "ARMConstantPoolValue.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMPerfectShuffle.h"
21 #include "ARMRegisterInfo.h"
22 #include "ARMSelectionDAGInfo.h"
23 #include "ARMSubtarget.h"
26 #include "Utils/ARMBaseInfo.h"
27 #include "llvm/ADT/APFloat.h"
28 #include "llvm/ADT/APInt.h"
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/BitVector.h"
31 #include "llvm/ADT/DenseMap.h"
32 #include "llvm/ADT/STLExtras.h"
33 #include "llvm/ADT/SmallPtrSet.h"
34 #include "llvm/ADT/SmallVector.h"
35 #include "llvm/ADT/Statistic.h"
36 #include "llvm/ADT/StringExtras.h"
37 #include "llvm/ADT/StringRef.h"
38 #include "llvm/ADT/StringSwitch.h"
39 #include "llvm/ADT/Triple.h"
40 #include "llvm/ADT/Twine.h"
64 #include "llvm/IR/Attributes.h"
65 #include "llvm/IR/CallingConv.h"
66 #include "llvm/IR/Constant.h"
67 #include "llvm/IR/Constants.h"
68 #include "llvm/IR/DataLayout.h"
69 #include "llvm/IR/DebugLoc.h"
70 #include "llvm/IR/DerivedTypes.h"
71 #include "llvm/IR/Function.h"
72 #include "llvm/IR/GlobalAlias.h"
73 #include "llvm/IR/GlobalValue.h"
74 #include "llvm/IR/GlobalVariable.h"
75 #include "llvm/IR/IRBuilder.h"
76 #include "llvm/IR/InlineAsm.h"
77 #include "llvm/IR/Instruction.h"
78 #include "llvm/IR/Instructions.h"
79 #include "llvm/IR/IntrinsicInst.h"
80 #include "llvm/IR/Intrinsics.h"
81 #include "llvm/IR/IntrinsicsARM.h"
82 #include "llvm/IR/Module.h"
83 #include "llvm/IR/PatternMatch.h"
84 #include "llvm/IR/Type.h"
85 #include "llvm/IR/User.h"
86 #include "llvm/IR/Value.h"
87 #include "llvm/MC/MCInstrDesc.h"
89 #include "llvm/MC/MCRegisterInfo.h"
90 #include "llvm/MC/MCSchedule.h"
93 #include "llvm/Support/Casting.h"
94 #include "llvm/Support/CodeGen.h"
96 #include "llvm/Support/Compiler.h"
97 #include "llvm/Support/Debug.h"
99 #include "llvm/Support/KnownBits.h"
101 #include "llvm/Support/MathExtras.h"
105 #include <algorithm>
106 #include <cassert>
107 #include <cstdint>
108 #include <cstdlib>
109 #include <iterator>
110 #include <limits>
111 #include <string>
112 #include <tuple>
113 #include <utility>
114 #include <vector>
115 
116 using namespace llvm;
117 using namespace llvm::PatternMatch;
118 
119 #define DEBUG_TYPE "arm-isel"
120 
121 STATISTIC(NumTailCalls, "Number of tail calls");
122 STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
123 STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments");
124 STATISTIC(NumConstpoolPromoted,
125  "Number of constants with their storage promoted into constant pools");
126 
127 static cl::opt<bool>
128 ARMInterworking("arm-interworking", cl::Hidden,
129  cl::desc("Enable / disable ARM interworking (for debugging only)"),
130  cl::init(true));
131 
133  "arm-promote-constant", cl::Hidden,
134  cl::desc("Enable / disable promotion of unnamed_addr constants into "
135  "constant pools"),
136  cl::init(false)); // FIXME: set to true by default once PR32780 is fixed
138  "arm-promote-constant-max-size", cl::Hidden,
139  cl::desc("Maximum size of constant to promote into a constant pool"),
140  cl::init(64));
142  "arm-promote-constant-max-total", cl::Hidden,
143  cl::desc("Maximum size of ALL constants to promote into a constant pool"),
144  cl::init(128));
145 
146 static cl::opt<unsigned>
147 MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden,
148  cl::desc("Maximum interleave factor for MVE VLDn to generate."),
149  cl::init(2));
150 
151 // The APCS parameter registers.
152 static const MCPhysReg GPRArgRegs[] = {
153  ARM::R0, ARM::R1, ARM::R2, ARM::R3
154 };
155 
156 void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT,
157  MVT PromotedBitwiseVT) {
158  if (VT != PromotedLdStVT) {
159  setOperationAction(ISD::LOAD, VT, Promote);
160  AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT);
161 
162  setOperationAction(ISD::STORE, VT, Promote);
163  AddPromotedToType (ISD::STORE, VT, PromotedLdStVT);
164  }
165 
166  MVT ElemTy = VT.getVectorElementType();
167  if (ElemTy != MVT::f64)
168  setOperationAction(ISD::SETCC, VT, Custom);
169  setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
170  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
171  if (ElemTy == MVT::i32) {
172  setOperationAction(ISD::SINT_TO_FP, VT, Custom);
173  setOperationAction(ISD::UINT_TO_FP, VT, Custom);
174  setOperationAction(ISD::FP_TO_SINT, VT, Custom);
175  setOperationAction(ISD::FP_TO_UINT, VT, Custom);
176  } else {
177  setOperationAction(ISD::SINT_TO_FP, VT, Expand);
178  setOperationAction(ISD::UINT_TO_FP, VT, Expand);
179  setOperationAction(ISD::FP_TO_SINT, VT, Expand);
180  setOperationAction(ISD::FP_TO_UINT, VT, Expand);
181  }
182  setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
183  setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
184  setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
185  setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
186  setOperationAction(ISD::SELECT, VT, Expand);
187  setOperationAction(ISD::SELECT_CC, VT, Expand);
188  setOperationAction(ISD::VSELECT, VT, Expand);
189  setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
190  if (VT.isInteger()) {
191  setOperationAction(ISD::SHL, VT, Custom);
192  setOperationAction(ISD::SRA, VT, Custom);
193  setOperationAction(ISD::SRL, VT, Custom);
194  }
195 
196  // Promote all bit-wise operations.
197  if (VT.isInteger() && VT != PromotedBitwiseVT) {
198  setOperationAction(ISD::AND, VT, Promote);
199  AddPromotedToType (ISD::AND, VT, PromotedBitwiseVT);
200  setOperationAction(ISD::OR, VT, Promote);
201  AddPromotedToType (ISD::OR, VT, PromotedBitwiseVT);
202  setOperationAction(ISD::XOR, VT, Promote);
203  AddPromotedToType (ISD::XOR, VT, PromotedBitwiseVT);
204  }
205 
206  // Neon does not support vector divide/remainder operations.
207  setOperationAction(ISD::SDIV, VT, Expand);
208  setOperationAction(ISD::UDIV, VT, Expand);
209  setOperationAction(ISD::FDIV, VT, Expand);
210  setOperationAction(ISD::SREM, VT, Expand);
211  setOperationAction(ISD::UREM, VT, Expand);
212  setOperationAction(ISD::FREM, VT, Expand);
213 
214  if (!VT.isFloatingPoint() &&
215  VT != MVT::v2i64 && VT != MVT::v1i64)
216  for (auto Opcode : {ISD::ABS, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
217  setOperationAction(Opcode, VT, Legal);
218  if (!VT.isFloatingPoint())
219  for (auto Opcode : {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT})
220  setOperationAction(Opcode, VT, Legal);
221 }
222 
223 void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
224  addRegisterClass(VT, &ARM::DPRRegClass);
225  addTypeForNEON(VT, MVT::f64, MVT::v2i32);
226 }
227 
228 void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
229  addRegisterClass(VT, &ARM::DPairRegClass);
230  addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
231 }
232 
233 void ARMTargetLowering::setAllExpand(MVT VT) {
234  for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
235  setOperationAction(Opc, VT, Expand);
236 
237  // We support these really simple operations even on types where all
238  // the actual arithmetic has to be broken down into simpler
239  // operations or turned into library calls.
240  setOperationAction(ISD::BITCAST, VT, Legal);
241  setOperationAction(ISD::LOAD, VT, Legal);
242  setOperationAction(ISD::STORE, VT, Legal);
243  setOperationAction(ISD::UNDEF, VT, Legal);
244 }
245 
246 void ARMTargetLowering::addAllExtLoads(const MVT From, const MVT To,
247  LegalizeAction Action) {
248  setLoadExtAction(ISD::EXTLOAD, From, To, Action);
249  setLoadExtAction(ISD::ZEXTLOAD, From, To, Action);
250  setLoadExtAction(ISD::SEXTLOAD, From, To, Action);
251 }
252 
253 void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
254  const MVT IntTypes[] = { MVT::v16i8, MVT::v8i16, MVT::v4i32 };
255 
256  for (auto VT : IntTypes) {
257  addRegisterClass(VT, &ARM::MQPRRegClass);
258  setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
259  setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
260  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
261  setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
262  setOperationAction(ISD::SHL, VT, Custom);
263  setOperationAction(ISD::SRA, VT, Custom);
264  setOperationAction(ISD::SRL, VT, Custom);
265  setOperationAction(ISD::SMIN, VT, Legal);
266  setOperationAction(ISD::SMAX, VT, Legal);
267  setOperationAction(ISD::UMIN, VT, Legal);
268  setOperationAction(ISD::UMAX, VT, Legal);
269  setOperationAction(ISD::ABS, VT, Legal);
270  setOperationAction(ISD::SETCC, VT, Custom);
271  setOperationAction(ISD::MLOAD, VT, Custom);
272  setOperationAction(ISD::MSTORE, VT, Legal);
273  setOperationAction(ISD::CTLZ, VT, Legal);
274  setOperationAction(ISD::CTTZ, VT, Custom);
275  setOperationAction(ISD::BITREVERSE, VT, Legal);
276  setOperationAction(ISD::BSWAP, VT, Legal);
277  setOperationAction(ISD::SADDSAT, VT, Legal);
278  setOperationAction(ISD::UADDSAT, VT, Legal);
279  setOperationAction(ISD::SSUBSAT, VT, Legal);
280  setOperationAction(ISD::USUBSAT, VT, Legal);
281 
282  // No native support for these.
283  setOperationAction(ISD::UDIV, VT, Expand);
284  setOperationAction(ISD::SDIV, VT, Expand);
285  setOperationAction(ISD::UREM, VT, Expand);
286  setOperationAction(ISD::SREM, VT, Expand);
287  setOperationAction(ISD::CTPOP, VT, Expand);
288 
289  // Vector reductions
290  setOperationAction(ISD::VECREDUCE_ADD, VT, Legal);
291  setOperationAction(ISD::VECREDUCE_SMAX, VT, Legal);
292  setOperationAction(ISD::VECREDUCE_UMAX, VT, Legal);
293  setOperationAction(ISD::VECREDUCE_SMIN, VT, Legal);
294  setOperationAction(ISD::VECREDUCE_UMIN, VT, Legal);
295 
296  if (!HasMVEFP) {
297  setOperationAction(ISD::SINT_TO_FP, VT, Expand);
298  setOperationAction(ISD::UINT_TO_FP, VT, Expand);
299  setOperationAction(ISD::FP_TO_SINT, VT, Expand);
300  setOperationAction(ISD::FP_TO_UINT, VT, Expand);
301  }
302 
303  // Pre and Post inc are supported on loads and stores
304  for (unsigned im = (unsigned)ISD::PRE_INC;
306  setIndexedLoadAction(im, VT, Legal);
307  setIndexedStoreAction(im, VT, Legal);
308  setIndexedMaskedLoadAction(im, VT, Legal);
309  setIndexedMaskedStoreAction(im, VT, Legal);
310  }
311  }
312 
313  const MVT FloatTypes[] = { MVT::v8f16, MVT::v4f32 };
314  for (auto VT : FloatTypes) {
315  addRegisterClass(VT, &ARM::MQPRRegClass);
316  if (!HasMVEFP)
317  setAllExpand(VT);
318 
319  // These are legal or custom whether we have MVE.fp or not
320  setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
321  setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
322  setOperationAction(ISD::INSERT_VECTOR_ELT, VT.getVectorElementType(), Custom);
323  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
324  setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
325  setOperationAction(ISD::BUILD_VECTOR, VT.getVectorElementType(), Custom);
326  setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Legal);
327  setOperationAction(ISD::SETCC, VT, Custom);
328  setOperationAction(ISD::MLOAD, VT, Custom);
329  setOperationAction(ISD::MSTORE, VT, Legal);
330 
331  // Pre and Post inc are supported on loads and stores
332  for (unsigned im = (unsigned)ISD::PRE_INC;
334  setIndexedLoadAction(im, VT, Legal);
335  setIndexedStoreAction(im, VT, Legal);
336  setIndexedMaskedLoadAction(im, VT, Legal);
337  setIndexedMaskedStoreAction(im, VT, Legal);
338  }
339 
340  if (HasMVEFP) {
341  setOperationAction(ISD::FMINNUM, VT, Legal);
342  setOperationAction(ISD::FMAXNUM, VT, Legal);
343  setOperationAction(ISD::FROUND, VT, Legal);
344 
345  // No native support for these.
346  setOperationAction(ISD::FDIV, VT, Expand);
347  setOperationAction(ISD::FREM, VT, Expand);
348  setOperationAction(ISD::FSQRT, VT, Expand);
349  setOperationAction(ISD::FSIN, VT, Expand);
350  setOperationAction(ISD::FCOS, VT, Expand);
351  setOperationAction(ISD::FPOW, VT, Expand);
352  setOperationAction(ISD::FLOG, VT, Expand);
353  setOperationAction(ISD::FLOG2, VT, Expand);
354  setOperationAction(ISD::FLOG10, VT, Expand);
355  setOperationAction(ISD::FEXP, VT, Expand);
356  setOperationAction(ISD::FEXP2, VT, Expand);
357  setOperationAction(ISD::FNEARBYINT, VT, Expand);
358  }
359  }
360 
361  // We 'support' these types up to bitcast/load/store level, regardless of
362  // MVE integer-only / float support. Only doing FP data processing on the FP
363  // vector types is inhibited at integer-only level.
364  const MVT LongTypes[] = { MVT::v2i64, MVT::v2f64 };
365  for (auto VT : LongTypes) {
366  addRegisterClass(VT, &ARM::MQPRRegClass);
367  setAllExpand(VT);
368  setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
369  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
370  setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
371  }
372  // We can do bitwise operations on v2i64 vectors
373  setOperationAction(ISD::AND, MVT::v2i64, Legal);
374  setOperationAction(ISD::OR, MVT::v2i64, Legal);
375  setOperationAction(ISD::XOR, MVT::v2i64, Legal);
376 
377  // It is legal to extload from v4i8 to v4i16 or v4i32.
378  addAllExtLoads(MVT::v8i16, MVT::v8i8, Legal);
379  addAllExtLoads(MVT::v4i32, MVT::v4i16, Legal);
380  addAllExtLoads(MVT::v4i32, MVT::v4i8, Legal);
381 
382  // It is legal to sign extend from v4i8/v4i16 to v4i32 or v8i8 to v8i16.
383  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Legal);
384  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Legal);
385  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Legal);
386  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v8i8, Legal);
387  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v8i16, Legal);
388 
389  // Some truncating stores are legal too.
390  setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
391  setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
392  setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
393 
394  // Pre and Post inc on these are legal, given the correct extends
395  for (unsigned im = (unsigned)ISD::PRE_INC;
397  for (auto VT : {MVT::v8i8, MVT::v4i8, MVT::v4i16}) {
398  setIndexedLoadAction(im, VT, Legal);
399  setIndexedStoreAction(im, VT, Legal);
400  setIndexedMaskedLoadAction(im, VT, Legal);
401  setIndexedMaskedStoreAction(im, VT, Legal);
402  }
403  }
404 
405  // Predicate types
406  const MVT pTypes[] = {MVT::v16i1, MVT::v8i1, MVT::v4i1};
407  for (auto VT : pTypes) {
408  addRegisterClass(VT, &ARM::VCCRRegClass);
409  setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
410  setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
411  setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
412  setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
413  setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
414  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
415  setOperationAction(ISD::SETCC, VT, Custom);
416  setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
417  setOperationAction(ISD::LOAD, VT, Custom);
418  setOperationAction(ISD::STORE, VT, Custom);
419  }
420 }
421 
423  const ARMSubtarget &STI)
424  : TargetLowering(TM), Subtarget(&STI) {
425  RegInfo = Subtarget->getRegisterInfo();
426  Itins = Subtarget->getInstrItineraryData();
427 
430 
431  if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetIOS() &&
432  !Subtarget->isTargetWatchOS()) {
433  bool IsHFTarget = TM.Options.FloatABIType == FloatABI::Hard;
434  for (int LCID = 0; LCID < RTLIB::UNKNOWN_LIBCALL; ++LCID)
435  setLibcallCallingConv(static_cast<RTLIB::Libcall>(LCID),
436  IsHFTarget ? CallingConv::ARM_AAPCS_VFP
438  }
439 
440  if (Subtarget->isTargetMachO()) {
441  // Uses VFP for Thumb libfuncs if available.
442  if (Subtarget->isThumb() && Subtarget->hasVFP2Base() &&
443  Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) {
444  static const struct {
445  const RTLIB::Libcall Op;
446  const char * const Name;
447  const ISD::CondCode Cond;
448  } LibraryCalls[] = {
449  // Single-precision floating-point arithmetic.
450  { RTLIB::ADD_F32, "__addsf3vfp", ISD::SETCC_INVALID },
451  { RTLIB::SUB_F32, "__subsf3vfp", ISD::SETCC_INVALID },
452  { RTLIB::MUL_F32, "__mulsf3vfp", ISD::SETCC_INVALID },
453  { RTLIB::DIV_F32, "__divsf3vfp", ISD::SETCC_INVALID },
454 
455  // Double-precision floating-point arithmetic.
456  { RTLIB::ADD_F64, "__adddf3vfp", ISD::SETCC_INVALID },
457  { RTLIB::SUB_F64, "__subdf3vfp", ISD::SETCC_INVALID },
458  { RTLIB::MUL_F64, "__muldf3vfp", ISD::SETCC_INVALID },
459  { RTLIB::DIV_F64, "__divdf3vfp", ISD::SETCC_INVALID },
460 
461  // Single-precision comparisons.
462  { RTLIB::OEQ_F32, "__eqsf2vfp", ISD::SETNE },
463  { RTLIB::UNE_F32, "__nesf2vfp", ISD::SETNE },
464  { RTLIB::OLT_F32, "__ltsf2vfp", ISD::SETNE },
465  { RTLIB::OLE_F32, "__lesf2vfp", ISD::SETNE },
466  { RTLIB::OGE_F32, "__gesf2vfp", ISD::SETNE },
467  { RTLIB::OGT_F32, "__gtsf2vfp", ISD::SETNE },
468  { RTLIB::UO_F32, "__unordsf2vfp", ISD::SETNE },
469 
470  // Double-precision comparisons.
471  { RTLIB::OEQ_F64, "__eqdf2vfp", ISD::SETNE },
472  { RTLIB::UNE_F64, "__nedf2vfp", ISD::SETNE },
473  { RTLIB::OLT_F64, "__ltdf2vfp", ISD::SETNE },
474  { RTLIB::OLE_F64, "__ledf2vfp", ISD::SETNE },
475  { RTLIB::OGE_F64, "__gedf2vfp", ISD::SETNE },
476  { RTLIB::OGT_F64, "__gtdf2vfp", ISD::SETNE },
477  { RTLIB::UO_F64, "__unorddf2vfp", ISD::SETNE },
478 
479  // Floating-point to integer conversions.
480  // i64 conversions are done via library routines even when generating VFP
481  // instructions, so use the same ones.
482  { RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp", ISD::SETCC_INVALID },
483  { RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp", ISD::SETCC_INVALID },
484  { RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp", ISD::SETCC_INVALID },
485  { RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp", ISD::SETCC_INVALID },
486 
487  // Conversions between floating types.
488  { RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp", ISD::SETCC_INVALID },
489  { RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp", ISD::SETCC_INVALID },
490 
491  // Integer to floating-point conversions.
492  // i64 conversions are done via library routines even when generating VFP
493  // instructions, so use the same ones.
494  // FIXME: There appears to be some naming inconsistency in ARM libgcc:
495  // e.g., __floatunsidf vs. __floatunssidfvfp.
496  { RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp", ISD::SETCC_INVALID },
497  { RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp", ISD::SETCC_INVALID },
498  { RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp", ISD::SETCC_INVALID },
499  { RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp", ISD::SETCC_INVALID },
500  };
501 
502  for (const auto &LC : LibraryCalls) {
503  setLibcallName(LC.Op, LC.Name);
504  if (LC.Cond != ISD::SETCC_INVALID)
505  setCmpLibcallCC(LC.Op, LC.Cond);
506  }
507  }
508  }
509 
510  // These libcalls are not available in 32-bit.
511  setLibcallName(RTLIB::SHL_I128, nullptr);
512  setLibcallName(RTLIB::SRL_I128, nullptr);
513  setLibcallName(RTLIB::SRA_I128, nullptr);
514 
515  // RTLIB
516  if (Subtarget->isAAPCS_ABI() &&
517  (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() ||
518  Subtarget->isTargetMuslAEABI() || Subtarget->isTargetAndroid())) {
519  static const struct {
520  const RTLIB::Libcall Op;
521  const char * const Name;
522  const CallingConv::ID CC;
523  const ISD::CondCode Cond;
524  } LibraryCalls[] = {
525  // Double-precision floating-point arithmetic helper functions
526  // RTABI chapter 4.1.2, Table 2
527  { RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
528  { RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
529  { RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
530  { RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
531 
532  // Double-precision floating-point comparison helper functions
533  // RTABI chapter 4.1.2, Table 3
534  { RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
535  { RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
536  { RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
537  { RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
538  { RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
539  { RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
540  { RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
541 
542  // Single-precision floating-point arithmetic helper functions
543  // RTABI chapter 4.1.2, Table 4
544  { RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
545  { RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
546  { RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
547  { RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
548 
549  // Single-precision floating-point comparison helper functions
550  // RTABI chapter 4.1.2, Table 5
551  { RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
552  { RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
553  { RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
554  { RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
555  { RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
556  { RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
557  { RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
558 
559  // Floating-point to integer conversions.
560  // RTABI chapter 4.1.2, Table 6
561  { RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
562  { RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
563  { RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
564  { RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
565  { RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
566  { RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
567  { RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
568  { RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
569 
570  // Conversions between floating types.
571  // RTABI chapter 4.1.2, Table 7
572  { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
573  { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
574  { RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
575 
576  // Integer to floating-point conversions.
577  // RTABI chapter 4.1.2, Table 8
578  { RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
579  { RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
580  { RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
581  { RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
582  { RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
583  { RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
584  { RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
585  { RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
586 
587  // Long long helper functions
588  // RTABI chapter 4.2, Table 9
589  { RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
590  { RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
591  { RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
592  { RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
593 
594  // Integer division functions
595  // RTABI chapter 4.3.1
596  { RTLIB::SDIV_I8, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
597  { RTLIB::SDIV_I16, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
598  { RTLIB::SDIV_I32, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
599  { RTLIB::SDIV_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
600  { RTLIB::UDIV_I8, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
601  { RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
602  { RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
603  { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
604  };
605 
606  for (const auto &LC : LibraryCalls) {
607  setLibcallName(LC.Op, LC.Name);
608  setLibcallCallingConv(LC.Op, LC.CC);
609  if (LC.Cond != ISD::SETCC_INVALID)
610  setCmpLibcallCC(LC.Op, LC.Cond);
611  }
612 
613  // EABI dependent RTLIB
614  if (TM.Options.EABIVersion == EABI::EABI4 ||
616  static const struct {
617  const RTLIB::Libcall Op;
618  const char *const Name;
619  const CallingConv::ID CC;
620  const ISD::CondCode Cond;
621  } MemOpsLibraryCalls[] = {
622  // Memory operations
623  // RTABI chapter 4.3.4
625  { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
626  { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
627  };
628 
629  for (const auto &LC : MemOpsLibraryCalls) {
630  setLibcallName(LC.Op, LC.Name);
631  setLibcallCallingConv(LC.Op, LC.CC);
632  if (LC.Cond != ISD::SETCC_INVALID)
633  setCmpLibcallCC(LC.Op, LC.Cond);
634  }
635  }
636  }
637 
638  if (Subtarget->isTargetWindows()) {
639  static const struct {
640  const RTLIB::Libcall Op;
641  const char * const Name;
642  const CallingConv::ID CC;
643  } LibraryCalls[] = {
644  { RTLIB::FPTOSINT_F32_I64, "__stoi64", CallingConv::ARM_AAPCS_VFP },
645  { RTLIB::FPTOSINT_F64_I64, "__dtoi64", CallingConv::ARM_AAPCS_VFP },
646  { RTLIB::FPTOUINT_F32_I64, "__stou64", CallingConv::ARM_AAPCS_VFP },
647  { RTLIB::FPTOUINT_F64_I64, "__dtou64", CallingConv::ARM_AAPCS_VFP },
648  { RTLIB::SINTTOFP_I64_F32, "__i64tos", CallingConv::ARM_AAPCS_VFP },
649  { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP },
650  { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP },
651  { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP },
652  };
653 
654  for (const auto &LC : LibraryCalls) {
655  setLibcallName(LC.Op, LC.Name);
656  setLibcallCallingConv(LC.Op, LC.CC);
657  }
658  }
659 
660  // Use divmod compiler-rt calls for iOS 5.0 and later.
661  if (Subtarget->isTargetMachO() &&
662  !(Subtarget->isTargetIOS() &&
663  Subtarget->getTargetTriple().isOSVersionLT(5, 0))) {
664  setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
665  setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
666  }
667 
668  // The half <-> float conversion functions are always soft-float on
669  // non-watchos platforms, but are needed for some targets which use a
670  // hard-float calling convention by default.
671  if (!Subtarget->isTargetWatchABI()) {
672  if (Subtarget->isAAPCS_ABI()) {
673  setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS);
674  setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS);
675  setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS);
676  } else {
677  setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_APCS);
678  setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_APCS);
679  setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS);
680  }
681  }
682 
683  // In EABI, these functions have an __aeabi_ prefix, but in GNUEABI they have
684  // a __gnu_ prefix (which is the default).
685  if (Subtarget->isTargetAEABI()) {
686  static const struct {
687  const RTLIB::Libcall Op;
688  const char * const Name;
689  const CallingConv::ID CC;
690  } LibraryCalls[] = {
691  { RTLIB::FPROUND_F32_F16, "__aeabi_f2h", CallingConv::ARM_AAPCS },
692  { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS },
693  { RTLIB::FPEXT_F16_F32, "__aeabi_h2f", CallingConv::ARM_AAPCS },
694  };
695 
696  for (const auto &LC : LibraryCalls) {
697  setLibcallName(LC.Op, LC.Name);
698  setLibcallCallingConv(LC.Op, LC.CC);
699  }
700  }
701 
702  if (Subtarget->isThumb1Only())
703  addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
704  else
705  addRegisterClass(MVT::i32, &ARM::GPRRegClass);
706 
707  if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only() &&
708  Subtarget->hasFPRegs()) {
709  addRegisterClass(MVT::f32, &ARM::SPRRegClass);
710  addRegisterClass(MVT::f64, &ARM::DPRRegClass);
711  if (!Subtarget->hasVFP2Base())
712  setAllExpand(MVT::f32);
713  if (!Subtarget->hasFP64())
714  setAllExpand(MVT::f64);
715  }
716 
717  if (Subtarget->hasFullFP16()) {
718  addRegisterClass(MVT::f16, &ARM::HPRRegClass);
722 
725  }
726 
727  for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
728  for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
729  setTruncStoreAction(VT, InnerVT, Expand);
730  addAllExtLoads(VT, InnerVT, Expand);
731  }
732 
737 
739  }
740 
743 
746 
747  if (Subtarget->hasMVEIntegerOps())
748  addMVEVectorTypes(Subtarget->hasMVEFloatOps());
749 
750  // Combine low-overhead loop intrinsics so that we can lower i1 types.
751  if (Subtarget->hasLOB()) {
754  }
755 
756  if (Subtarget->hasNEON()) {
757  addDRTypeForNEON(MVT::v2f32);
758  addDRTypeForNEON(MVT::v8i8);
759  addDRTypeForNEON(MVT::v4i16);
760  addDRTypeForNEON(MVT::v2i32);
761  addDRTypeForNEON(MVT::v1i64);
762 
763  addQRTypeForNEON(MVT::v4f32);
764  addQRTypeForNEON(MVT::v2f64);
765  addQRTypeForNEON(MVT::v16i8);
766  addQRTypeForNEON(MVT::v8i16);
767  addQRTypeForNEON(MVT::v4i32);
768  addQRTypeForNEON(MVT::v2i64);
769 
770  if (Subtarget->hasFullFP16()) {
771  addQRTypeForNEON(MVT::v8f16);
772  addDRTypeForNEON(MVT::v4f16);
773  }
774  }
775 
776  if (Subtarget->hasMVEIntegerOps() || Subtarget->hasNEON()) {
777  // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
778  // none of Neon, MVE or VFP supports any arithmetic operations on it.
782  // FIXME: Code duplication: FDIV and FREM are expanded always, see
783  // ARMTargetLowering::addTypeForNEON method for details.
786  // FIXME: Create unittest.
787  // In another words, find a way when "copysign" appears in DAG with vector
788  // operands.
790  // FIXME: Code duplication: SETCC has custom operation action, see
791  // ARMTargetLowering::addTypeForNEON method for details.
793  // FIXME: Create unittest for FNEG and for FABS.
805  // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR.
812  }
813 
814  if (Subtarget->hasNEON()) {
815  // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
816  // supported for v4f32.
831 
832  // Mark v2f32 intrinsics.
847 
848  // Neon does not support some operations on v1i64 and v2i64 types.
850  // Custom handling for some quad-vector types to detect VMULL.
854  // Custom handling for some vector types to avoid expensive expansions
859  // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
860  // a destination type that is wider than the source, and nor does
861  // it have a FP_TO_[SU]INT instruction with a narrower destination than
862  // source.
871 
874 
875  // NEON does not have single instruction CTPOP for vectors with element
876  // types wider than 8-bits. However, custom lowering can leverage the
877  // v8i8/v16i8 vcnt instruction.
884 
887 
888  // NEON does not have single instruction CTTZ for vectors.
893 
898 
903 
908 
909  // NEON only has FMA instructions as of VFP4.
910  if (!Subtarget->hasVFP4Base()) {
913  }
914 
925 
926  // It is legal to extload from v4i8 to v4i16 or v4i32.
928  MVT::v2i32}) {
933  }
934  }
935  }
936 
937  if (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) {
945  }
946 
947  if (!Subtarget->hasFP64()) {
948  // When targeting a floating-point unit with only single-precision
949  // operations, f64 is legal for the few double-precision instructions which
950  // are present However, no double-precision operations other than moves,
951  // loads and stores are provided by the hardware.
988  }
989 
990  if (!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) {
993  if (Subtarget->hasFullFP16()) {
996  }
997  }
998 
999  if (!Subtarget->hasFP16()) {
1002  }
1003 
1005 
1006  // ARM does not have floating-point extending loads.
1007  for (MVT VT : MVT::fp_valuetypes()) {
1010  }
1011 
1012  // ... or truncating stores
1016 
1017  // ARM does not have i1 sign extending load.
1018  for (MVT VT : MVT::integer_valuetypes())
1020 
1021  // ARM supports all 4 flavors of integer indexed load / store.
1022  if (!Subtarget->isThumb1Only()) {
1023  for (unsigned im = (unsigned)ISD::PRE_INC;
1033  }
1034  } else {
1035  // Thumb-1 has limited post-inc load/store support - LDM r0!, {r1}.
1038  }
1039 
1044 
1047  if (Subtarget->hasDSP()) {
1052  }
1053  if (Subtarget->hasBaseDSP()) {
1056  }
1057 
1058  // i64 operation support.
1061  if (Subtarget->isThumb1Only()) {
1064  }
1065  if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
1066  || (Subtarget->isThumb2() && !Subtarget->hasDSP()))
1068 
1076 
1077  // MVE lowers 64 bit shifts to lsll and lsrl
1078  // assuming that ISD::SRL and SRA of i64 are already marked custom
1079  if (Subtarget->hasMVEIntegerOps())
1081 
1082  // Expand to __aeabi_l{lsl,lsr,asr} calls for Thumb1.
1083  if (Subtarget->isThumb1Only()) {
1087  }
1088 
1089  if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops())
1091 
1092  // ARM does not have ROTL.
1094  for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
1097  }
1100  if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) {
1103  }
1104 
1105  // @llvm.readcyclecounter requires the Performance Monitors extension.
1106  // Default to the 0 expansion on unsupported platforms.
1107  // FIXME: Technically there are older ARM CPUs that have
1108  // implementation-specific ways of obtaining this information.
1109  if (Subtarget->hasPerfMon())
1111 
1112  // Only ARMv6 has BSWAP.
1113  if (!Subtarget->hasV6Ops())
1115 
1116  bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
1117  : Subtarget->hasDivideInARMMode();
1118  if (!hasDivide) {
1119  // These are expanded into libcalls if the cpu doesn't have HW divider.
1122  }
1123 
1124  if (Subtarget->isTargetWindows() && !Subtarget->hasDivideInThumbMode()) {
1127 
1130  }
1131 
1134 
1135  // Register based DivRem for AEABI (RTABI 4.2)
1136  if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
1137  Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
1138  Subtarget->isTargetWindows()) {
1141  HasStandaloneRem = false;
1142 
1143  if (Subtarget->isTargetWindows()) {
1144  const struct {
1145  const RTLIB::Libcall Op;
1146  const char * const Name;
1147  const CallingConv::ID CC;
1148  } LibraryCalls[] = {
1149  { RTLIB::SDIVREM_I8, "__rt_sdiv", CallingConv::ARM_AAPCS },
1150  { RTLIB::SDIVREM_I16, "__rt_sdiv", CallingConv::ARM_AAPCS },
1151  { RTLIB::SDIVREM_I32, "__rt_sdiv", CallingConv::ARM_AAPCS },
1152  { RTLIB::SDIVREM_I64, "__rt_sdiv64", CallingConv::ARM_AAPCS },
1153 
1154  { RTLIB::UDIVREM_I8, "__rt_udiv", CallingConv::ARM_AAPCS },
1155  { RTLIB::UDIVREM_I16, "__rt_udiv", CallingConv::ARM_AAPCS },
1156  { RTLIB::UDIVREM_I32, "__rt_udiv", CallingConv::ARM_AAPCS },
1157  { RTLIB::UDIVREM_I64, "__rt_udiv64", CallingConv::ARM_AAPCS },
1158  };
1159 
1160  for (const auto &LC : LibraryCalls) {
1161  setLibcallName(LC.Op, LC.Name);
1162  setLibcallCallingConv(LC.Op, LC.CC);
1163  }
1164  } else {
1165  const struct {
1166  const RTLIB::Libcall Op;
1167  const char * const Name;
1168  const CallingConv::ID CC;
1169  } LibraryCalls[] = {
1170  { RTLIB::SDIVREM_I8, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1171  { RTLIB::SDIVREM_I16, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1172  { RTLIB::SDIVREM_I32, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1173  { RTLIB::SDIVREM_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS },
1174 
1175  { RTLIB::UDIVREM_I8, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1176  { RTLIB::UDIVREM_I16, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1177  { RTLIB::UDIVREM_I32, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1178  { RTLIB::UDIVREM_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS },
1179  };
1180 
1181  for (const auto &LC : LibraryCalls) {
1182  setLibcallName(LC.Op, LC.Name);
1183  setLibcallCallingConv(LC.Op, LC.CC);
1184  }
1185  }
1186 
1191  } else {
1194  }
1195 
1196  if (Subtarget->getTargetTriple().isOSMSVCRT()) {
1197  // MSVCRT doesn't have powi; fall back to pow
1198  setLibcallName(RTLIB::POWI_F32, nullptr);
1199  setLibcallName(RTLIB::POWI_F64, nullptr);
1200  }
1201 
1206 
1209 
1210  // Use the default implementation.
1217 
1218  if (Subtarget->isTargetWindows())
1220  else
1222 
1223  // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
1224  // the default expansion.
1225  InsertFencesForAtomic = false;
1226  if (Subtarget->hasAnyDataBarrier() &&
1227  (!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps())) {
1228  // ATOMIC_FENCE needs custom lowering; the others should have been expanded
1229  // to ldrex/strex loops already.
1231  if (!Subtarget->isThumb() || !Subtarget->isMClass())
1233 
1234  // On v8, we have particularly efficient implementations of atomic fences
1235  // if they can be combined with nearby atomic loads and stores.
1236  if (!Subtarget->hasAcquireRelease() ||
1237  getTargetMachine().getOptLevel() == 0) {
1238  // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc.
1239  InsertFencesForAtomic = true;
1240  }
1241  } else {
1242  // If there's anything we can use as a barrier, go through custom lowering
1243  // for ATOMIC_FENCE.
1244  // If target has DMB in thumb, Fences can be inserted.
1245  if (Subtarget->hasDataBarrier())
1246  InsertFencesForAtomic = true;
1247 
1249  Subtarget->hasAnyDataBarrier() ? Custom : Expand);
1250 
1251  // Set them all for expansion, which will force libcalls.
1264  // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the
1265  // Unordered/Monotonic case.
1266  if (!InsertFencesForAtomic) {
1269  }
1270  }
1271 
1273 
1274  // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
1275  if (!Subtarget->hasV6Ops()) {
1278  }
1280 
1281  if (!Subtarget->useSoftFloat() && Subtarget->hasFPRegs() &&
1282  !Subtarget->isThumb1Only()) {
1283  // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
1284  // iff target supports vfp2.
1287  }
1288 
1289  // We want to custom lower some of our intrinsics.
1294  if (Subtarget->useSjLjEH())
1295  setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
1296 
1306  if (Subtarget->hasFullFP16()) {
1310  }
1311 
1313 
1316  if (Subtarget->hasFullFP16())
1321 
1322  // We don't support sin/cos/fmod/copysign/pow
1331  if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2Base() &&
1332  !Subtarget->isThumb1Only()) {
1335  }
1338 
1339  if (!Subtarget->hasVFP4Base()) {
1342  }
1343 
1344  // Various VFP goodness
1345  if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only()) {
1346  // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded.
1347  if (!Subtarget->hasFPARMv8Base() || !Subtarget->hasFP64()) {
1350  }
1351 
1352  // fp16 is a special v7 extension that adds f16 <-> f32 conversions.
1353  if (!Subtarget->hasFP16()) {
1356  }
1357 
1358  // Strict floating-point comparisons need custom lowering.
1365  }
1366 
1367  // Use __sincos_stret if available.
1368  if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
1369  getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
1372  }
1373 
1374  // FP-ARMv8 implements a lot of rounding-like FP operations.
1375  if (Subtarget->hasFPARMv8Base()) {
1384  if (Subtarget->hasNEON()) {
1389  }
1390 
1391  if (Subtarget->hasFP64()) {
1400  }
1401  }
1402 
1403  // FP16 often need to be promoted to call lib functions
1404  if (Subtarget->hasFullFP16()) {
1417 
1419  }
1420 
1421  if (Subtarget->hasNEON()) {
1422  // vmin and vmax aren't available in a scalar form, so we use
1423  // a NEON instruction with an undef lane instead.
1432 
1433  if (Subtarget->hasFullFP16()) {
1438 
1443  }
1444  }
1445 
1446  // We have target-specific dag combine patterns for the following nodes:
1447  // ARMISD::VMOVRRD - No need to call setTargetDAGCombine
1454 
1455  if (Subtarget->hasV6Ops())
1457  if (Subtarget->isThumb1Only())
1459 
1461 
1462  if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() ||
1463  !Subtarget->hasVFP2Base() || Subtarget->hasMinSize())
1465  else
1467 
1468  //// temporary - rewrite interface to use type
1469  MaxStoresPerMemset = 8;
1471  MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
1473  MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
1475 
1476  // On ARM arguments smaller than 4 bytes are extended, so all arguments
1477  // are at least 4 bytes aligned.
1479 
1480  // Prefer likely predicted branches to selects on out-of-order cores.
1481  PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder();
1482 
1483  setPrefLoopAlignment(Align(1ULL << Subtarget->getPrefLoopLogAlignment()));
1484 
1485  setMinFunctionAlignment(Subtarget->isThumb() ? Align(2) : Align(4));
1486 
1487  if (Subtarget->isThumb() || Subtarget->isThumb2())
1489 }
1490 
1492  return Subtarget->useSoftFloat();
1493 }
1494 
1495 // FIXME: It might make sense to define the representative register class as the
1496 // nearest super-register that has a non-null superset. For example, DPR_VFP2 is
1497 // a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
1498 // SPR's representative would be DPR_VFP2. This should work well if register
1499 // pressure tracking were modified such that a register use would increment the
1500 // pressure of the register class's representative and all of it's super
1501 // classes' representatives transitively. We have not implemented this because
1502 // of the difficulty prior to coalescing of modeling operand register classes
1503 // due to the common occurrence of cross class copies and subregister insertions
1504 // and extractions.
1505 std::pair<const TargetRegisterClass *, uint8_t>
1507  MVT VT) const {
1508  const TargetRegisterClass *RRC = nullptr;
1509  uint8_t Cost = 1;
1510  switch (VT.SimpleTy) {
1511  default:
1513  // Use DPR as representative register class for all floating point
1514  // and vector types. Since there are 32 SPR registers and 32 DPR registers so
1515  // the cost is 1 for both f32 and f64.
1516  case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
1517  case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
1518  RRC = &ARM::DPRRegClass;
1519  // When NEON is used for SP, only half of the register file is available
1520  // because operations that define both SP and DP results will be constrained
1521  // to the VFP2 class (D0-D15). We currently model this constraint prior to
1522  // coalescing by double-counting the SP regs. See the FIXME above.
1523  if (Subtarget->useNEONForSinglePrecisionFP())
1524  Cost = 2;
1525  break;
1526  case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
1527  case MVT::v4f32: case MVT::v2f64:
1528  RRC = &ARM::DPRRegClass;
1529  Cost = 2;
1530  break;
1531  case MVT::v4i64:
1532  RRC = &ARM::DPRRegClass;
1533  Cost = 4;
1534  break;
1535  case MVT::v8i64:
1536  RRC = &ARM::DPRRegClass;
1537  Cost = 8;
1538  break;
1539  }
1540  return std::make_pair(RRC, Cost);
1541 }
1542 
1543 const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
1544  switch ((ARMISD::NodeType)Opcode) {
1545  case ARMISD::FIRST_NUMBER: break;
1546  case ARMISD::Wrapper: return "ARMISD::Wrapper";
1547  case ARMISD::WrapperPIC: return "ARMISD::WrapperPIC";
1548  case ARMISD::WrapperJT: return "ARMISD::WrapperJT";
1549  case ARMISD::COPY_STRUCT_BYVAL: return "ARMISD::COPY_STRUCT_BYVAL";
1550  case ARMISD::CALL: return "ARMISD::CALL";
1551  case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED";
1552  case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK";
1553  case ARMISD::BRCOND: return "ARMISD::BRCOND";
1554  case ARMISD::BR_JT: return "ARMISD::BR_JT";
1555  case ARMISD::BR2_JT: return "ARMISD::BR2_JT";
1556  case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG";
1557  case ARMISD::INTRET_FLAG: return "ARMISD::INTRET_FLAG";
1558  case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD";
1559  case ARMISD::CMP: return "ARMISD::CMP";
1560  case ARMISD::CMN: return "ARMISD::CMN";
1561  case ARMISD::CMPZ: return "ARMISD::CMPZ";
1562  case ARMISD::CMPFP: return "ARMISD::CMPFP";
1563  case ARMISD::CMPFPE: return "ARMISD::CMPFPE";
1564  case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0";
1565  case ARMISD::CMPFPEw0: return "ARMISD::CMPFPEw0";
1566  case ARMISD::BCC_i64: return "ARMISD::BCC_i64";
1567  case ARMISD::FMSTAT: return "ARMISD::FMSTAT";
1568 
1569  case ARMISD::CMOV: return "ARMISD::CMOV";
1570  case ARMISD::SUBS: return "ARMISD::SUBS";
1571 
1572  case ARMISD::SSAT: return "ARMISD::SSAT";
1573  case ARMISD::USAT: return "ARMISD::USAT";
1574 
1575  case ARMISD::ASRL: return "ARMISD::ASRL";
1576  case ARMISD::LSRL: return "ARMISD::LSRL";
1577  case ARMISD::LSLL: return "ARMISD::LSLL";
1578 
1579  case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG";
1580  case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG";
1581  case ARMISD::RRX: return "ARMISD::RRX";
1582 
1583  case ARMISD::ADDC: return "ARMISD::ADDC";
1584  case ARMISD::ADDE: return "ARMISD::ADDE";
1585  case ARMISD::SUBC: return "ARMISD::SUBC";
1586  case ARMISD::SUBE: return "ARMISD::SUBE";
1587  case ARMISD::LSLS: return "ARMISD::LSLS";
1588 
1589  case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD";
1590  case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR";
1591  case ARMISD::VMOVhr: return "ARMISD::VMOVhr";
1592  case ARMISD::VMOVrh: return "ARMISD::VMOVrh";
1593  case ARMISD::VMOVSR: return "ARMISD::VMOVSR";
1594 
1595  case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
1596  case ARMISD::EH_SJLJ_LONGJMP: return "ARMISD::EH_SJLJ_LONGJMP";
1597  case ARMISD::EH_SJLJ_SETUP_DISPATCH: return "ARMISD::EH_SJLJ_SETUP_DISPATCH";
1598 
1599  case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN";
1600 
1601  case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";
1602 
1603  case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC";
1604 
1605  case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR";
1606 
1607  case ARMISD::PRELOAD: return "ARMISD::PRELOAD";
1608 
1609  case ARMISD::WIN__CHKSTK: return "ARMISD::WIN__CHKSTK";
1610  case ARMISD::WIN__DBZCHK: return "ARMISD::WIN__DBZCHK";
1611 
1612  case ARMISD::PREDICATE_CAST: return "ARMISD::PREDICATE_CAST";
1613  case ARMISD::VCMP: return "ARMISD::VCMP";
1614  case ARMISD::VCMPZ: return "ARMISD::VCMPZ";
1615  case ARMISD::VTST: return "ARMISD::VTST";
1616 
1617  case ARMISD::VSHLs: return "ARMISD::VSHLs";
1618  case ARMISD::VSHLu: return "ARMISD::VSHLu";
1619  case ARMISD::VSHLIMM: return "ARMISD::VSHLIMM";
1620  case ARMISD::VSHRsIMM: return "ARMISD::VSHRsIMM";
1621  case ARMISD::VSHRuIMM: return "ARMISD::VSHRuIMM";
1622  case ARMISD::VRSHRsIMM: return "ARMISD::VRSHRsIMM";
1623  case ARMISD::VRSHRuIMM: return "ARMISD::VRSHRuIMM";
1624  case ARMISD::VRSHRNIMM: return "ARMISD::VRSHRNIMM";
1625  case ARMISD::VQSHLsIMM: return "ARMISD::VQSHLsIMM";
1626  case ARMISD::VQSHLuIMM: return "ARMISD::VQSHLuIMM";
1627  case ARMISD::VQSHLsuIMM: return "ARMISD::VQSHLsuIMM";
1628  case ARMISD::VQSHRNsIMM: return "ARMISD::VQSHRNsIMM";
1629  case ARMISD::VQSHRNuIMM: return "ARMISD::VQSHRNuIMM";
1630  case ARMISD::VQSHRNsuIMM: return "ARMISD::VQSHRNsuIMM";
1631  case ARMISD::VQRSHRNsIMM: return "ARMISD::VQRSHRNsIMM";
1632  case ARMISD::VQRSHRNuIMM: return "ARMISD::VQRSHRNuIMM";
1633  case ARMISD::VQRSHRNsuIMM: return "ARMISD::VQRSHRNsuIMM";
1634  case ARMISD::VSLIIMM: return "ARMISD::VSLIIMM";
1635  case ARMISD::VSRIIMM: return "ARMISD::VSRIIMM";
1636  case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu";
1637  case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs";
1638  case ARMISD::VMOVIMM: return "ARMISD::VMOVIMM";
1639  case ARMISD::VMVNIMM: return "ARMISD::VMVNIMM";
1640  case ARMISD::VMOVFPIMM: return "ARMISD::VMOVFPIMM";
1641  case ARMISD::VDUP: return "ARMISD::VDUP";
1642  case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE";
1643  case ARMISD::VEXT: return "ARMISD::VEXT";
1644  case ARMISD::VREV64: return "ARMISD::VREV64";
1645  case ARMISD::VREV32: return "ARMISD::VREV32";
1646  case ARMISD::VREV16: return "ARMISD::VREV16";
1647  case ARMISD::VZIP: return "ARMISD::VZIP";
1648  case ARMISD::VUZP: return "ARMISD::VUZP";
1649  case ARMISD::VTRN: return "ARMISD::VTRN";
1650  case ARMISD::VTBL1: return "ARMISD::VTBL1";
1651  case ARMISD::VTBL2: return "ARMISD::VTBL2";
1652  case ARMISD::VMOVN: return "ARMISD::VMOVN";
1653  case ARMISD::VMULLs: return "ARMISD::VMULLs";
1654  case ARMISD::VMULLu: return "ARMISD::VMULLu";
1655  case ARMISD::UMAAL: return "ARMISD::UMAAL";
1656  case ARMISD::UMLAL: return "ARMISD::UMLAL";
1657  case ARMISD::SMLAL: return "ARMISD::SMLAL";
1658  case ARMISD::SMLALBB: return "ARMISD::SMLALBB";
1659  case ARMISD::SMLALBT: return "ARMISD::SMLALBT";
1660  case ARMISD::SMLALTB: return "ARMISD::SMLALTB";
1661  case ARMISD::SMLALTT: return "ARMISD::SMLALTT";
1662  case ARMISD::SMULWB: return "ARMISD::SMULWB";
1663  case ARMISD::SMULWT: return "ARMISD::SMULWT";
1664  case ARMISD::SMLALD: return "ARMISD::SMLALD";
1665  case ARMISD::SMLALDX: return "ARMISD::SMLALDX";
1666  case ARMISD::SMLSLD: return "ARMISD::SMLSLD";
1667  case ARMISD::SMLSLDX: return "ARMISD::SMLSLDX";
1668  case ARMISD::SMMLAR: return "ARMISD::SMMLAR";
1669  case ARMISD::SMMLSR: return "ARMISD::SMMLSR";
1670  case ARMISD::QADD16b: return "ARMISD::QADD16b";
1671  case ARMISD::QSUB16b: return "ARMISD::QSUB16b";
1672  case ARMISD::QADD8b: return "ARMISD::QADD8b";
1673  case ARMISD::QSUB8b: return "ARMISD::QSUB8b";
1674  case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
1675  case ARMISD::BFI: return "ARMISD::BFI";
1676  case ARMISD::VORRIMM: return "ARMISD::VORRIMM";
1677  case ARMISD::VBICIMM: return "ARMISD::VBICIMM";
1678  case ARMISD::VBSL: return "ARMISD::VBSL";
1679  case ARMISD::MEMCPY: return "ARMISD::MEMCPY";
1680  case ARMISD::VLD1DUP: return "ARMISD::VLD1DUP";
1681  case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP";
1682  case ARMISD::VLD3DUP: return "ARMISD::VLD3DUP";
1683  case ARMISD::VLD4DUP: return "ARMISD::VLD4DUP";
1684  case ARMISD::VLD1_UPD: return "ARMISD::VLD1_UPD";
1685  case ARMISD::VLD2_UPD: return "ARMISD::VLD2_UPD";
1686  case ARMISD::VLD3_UPD: return "ARMISD::VLD3_UPD";
1687  case ARMISD::VLD4_UPD: return "ARMISD::VLD4_UPD";
1688  case ARMISD::VLD2LN_UPD: return "ARMISD::VLD2LN_UPD";
1689  case ARMISD::VLD3LN_UPD: return "ARMISD::VLD3LN_UPD";
1690  case ARMISD::VLD4LN_UPD: return "ARMISD::VLD4LN_UPD";
1691  case ARMISD::VLD1DUP_UPD: return "ARMISD::VLD1DUP_UPD";
1692  case ARMISD::VLD2DUP_UPD: return "ARMISD::VLD2DUP_UPD";
1693  case ARMISD::VLD3DUP_UPD: return "ARMISD::VLD3DUP_UPD";
1694  case ARMISD::VLD4DUP_UPD: return "ARMISD::VLD4DUP_UPD";
1695  case ARMISD::VST1_UPD: return "ARMISD::VST1_UPD";
1696  case ARMISD::VST2_UPD: return "ARMISD::VST2_UPD";
1697  case ARMISD::VST3_UPD: return "ARMISD::VST3_UPD";
1698  case ARMISD::VST4_UPD: return "ARMISD::VST4_UPD";
1699  case ARMISD::VST2LN_UPD: return "ARMISD::VST2LN_UPD";
1700  case ARMISD::VST3LN_UPD: return "ARMISD::VST3LN_UPD";
1701  case ARMISD::VST4LN_UPD: return "ARMISD::VST4LN_UPD";
1702  case ARMISD::WLS: return "ARMISD::WLS";
1703  case ARMISD::LE: return "ARMISD::LE";
1704  case ARMISD::LOOP_DEC: return "ARMISD::LOOP_DEC";
1705  case ARMISD::CSINV: return "ARMISD::CSINV";
1706  case ARMISD::CSNEG: return "ARMISD::CSNEG";
1707  case ARMISD::CSINC: return "ARMISD::CSINC";
1708  }
1709  return nullptr;
1710 }
1711 
1713  EVT VT) const {
1714  if (!VT.isVector())
1715  return getPointerTy(DL);
1716 
1717  // MVE has a predicate register.
1718  if (Subtarget->hasMVEIntegerOps() &&
1719  (VT == MVT::v4i32 || VT == MVT::v8i16 || VT == MVT::v16i8))
1722 }
1723 
1724 /// getRegClassFor - Return the register class that should be used for the
1725 /// specified value type.
1726 const TargetRegisterClass *
1727 ARMTargetLowering::getRegClassFor(MVT VT, bool isDivergent) const {
1728  (void)isDivergent;
1729  // Map v4i64 to QQ registers but do not make the type legal. Similarly map
1730  // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
1731  // load / store 4 to 8 consecutive NEON D registers, or 2 to 4 consecutive
1732  // MVE Q registers.
1733  if (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) {
1734  if (VT == MVT::v4i64)
1735  return &ARM::QQPRRegClass;
1736  if (VT == MVT::v8i64)
1737  return &ARM::QQQQPRRegClass;
1738  }
1739  return TargetLowering::getRegClassFor(VT);
1740 }
1741 
1742 // memcpy, and other memory intrinsics, typically tries to use LDM/STM if the
1743 // source/dest is aligned and the copy size is large enough. We therefore want
1744 // to align such objects passed to memory intrinsics.
1746  unsigned &PrefAlign) const {
1747  if (!isa<MemIntrinsic>(CI))
1748  return false;
1749  MinSize = 8;
1750  // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1
1751  // cycle faster than 4-byte aligned LDM.
1752  PrefAlign = (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? 8 : 4);
1753  return true;
1754 }
1755 
1756 // Create a fast isel object.
1757 FastISel *
1759  const TargetLibraryInfo *libInfo) const {
1760  return ARM::createFastISel(funcInfo, libInfo);
1761 }
1762 
1764  unsigned NumVals = N->getNumValues();
1765  if (!NumVals)
1766  return Sched::RegPressure;
1767 
1768  for (unsigned i = 0; i != NumVals; ++i) {
1769  EVT VT = N->getValueType(i);
1770  if (VT == MVT::Glue || VT == MVT::Other)
1771  continue;
1772  if (VT.isFloatingPoint() || VT.isVector())
1773  return Sched::ILP;
1774  }
1775 
1776  if (!N->isMachineOpcode())
1777  return Sched::RegPressure;
1778 
1779  // Load are scheduled for latency even if there instruction itinerary
1780  // is not available.
1781  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
1782  const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1783 
1784  if (MCID.getNumDefs() == 0)
1785  return Sched::RegPressure;
1786  if (!Itins->isEmpty() &&
1787  Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2)
1788  return Sched::ILP;
1789 
1790  return Sched::RegPressure;
1791 }
1792 
1793 //===----------------------------------------------------------------------===//
1794 // Lowering Code
1795 //===----------------------------------------------------------------------===//
1796 
1797 static bool isSRL16(const SDValue &Op) {
1798  if (Op.getOpcode() != ISD::SRL)
1799  return false;
1800  if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1801  return Const->getZExtValue() == 16;
1802  return false;
1803 }
1804 
1805 static bool isSRA16(const SDValue &Op) {
1806  if (Op.getOpcode() != ISD::SRA)
1807  return false;
1808  if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1809  return Const->getZExtValue() == 16;
1810  return false;
1811 }
1812 
1813 static bool isSHL16(const SDValue &Op) {
1814  if (Op.getOpcode() != ISD::SHL)
1815  return false;
1816  if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1817  return Const->getZExtValue() == 16;
1818  return false;
1819 }
1820 
1821 // Check for a signed 16-bit value. We special case SRA because it makes it
1822 // more simple when also looking for SRAs that aren't sign extending a
1823 // smaller value. Without the check, we'd need to take extra care with
1824 // checking order for some operations.
1825 static bool isS16(const SDValue &Op, SelectionDAG &DAG) {
1826  if (isSRA16(Op))
1827  return isSHL16(Op.getOperand(0));
1828  return DAG.ComputeNumSignBits(Op) == 17;
1829 }
1830 
1831 /// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
1833  switch (CC) {
1834  default: llvm_unreachable("Unknown condition code!");
1835  case ISD::SETNE: return ARMCC::NE;
1836  case ISD::SETEQ: return ARMCC::EQ;
1837  case ISD::SETGT: return ARMCC::GT;
1838  case ISD::SETGE: return ARMCC::GE;
1839  case ISD::SETLT: return ARMCC::LT;
1840  case ISD::SETLE: return ARMCC::LE;
1841  case ISD::SETUGT: return ARMCC::HI;
1842  case ISD::SETUGE: return ARMCC::HS;
1843  case ISD::SETULT: return ARMCC::LO;
1844  case ISD::SETULE: return ARMCC::LS;
1845  }
1846 }
1847 
1848 /// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
1850  ARMCC::CondCodes &CondCode2) {
1851  CondCode2 = ARMCC::AL;
1852  switch (CC) {
1853  default: llvm_unreachable("Unknown FP condition!");
1854  case ISD::SETEQ:
1855  case ISD::SETOEQ: CondCode = ARMCC::EQ; break;
1856  case ISD::SETGT:
1857  case ISD::SETOGT: CondCode = ARMCC::GT; break;
1858  case ISD::SETGE:
1859  case ISD::SETOGE: CondCode = ARMCC::GE; break;
1860  case ISD::SETOLT: CondCode = ARMCC::MI; break;
1861  case ISD::SETOLE: CondCode = ARMCC::LS; break;
1862  case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;
1863  case ISD::SETO: CondCode = ARMCC::VC; break;
1864  case ISD::SETUO: CondCode = ARMCC::VS; break;
1865  case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break;
1866  case ISD::SETUGT: CondCode = ARMCC::HI; break;
1867  case ISD::SETUGE: CondCode = ARMCC::PL; break;
1868  case ISD::SETLT:
1869  case ISD::SETULT: CondCode = ARMCC::LT; break;
1870  case ISD::SETLE:
1871  case ISD::SETULE: CondCode = ARMCC::LE; break;
1872  case ISD::SETNE:
1873  case ISD::SETUNE: CondCode = ARMCC::NE; break;
1874  }
1875 }
1876 
1877 //===----------------------------------------------------------------------===//
1878 // Calling Convention Implementation
1879 //===----------------------------------------------------------------------===//
1880 
1881 /// getEffectiveCallingConv - Get the effective calling convention, taking into
1882 /// account presence of floating point hardware and calling convention
1883 /// limitations, such as support for variadic functions.
1885 ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
1886  bool isVarArg) const {
1887  switch (CC) {
1888  default:
1889  report_fatal_error("Unsupported calling convention");
1891  case CallingConv::ARM_APCS:
1892  case CallingConv::GHC:
1894  return CC;
1898  case CallingConv::Swift:
1900  case CallingConv::C:
1901  if (!Subtarget->isAAPCS_ABI())
1902  return CallingConv::ARM_APCS;
1903  else if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() &&
1905  !isVarArg)
1907  else
1908  return CallingConv::ARM_AAPCS;
1909  case CallingConv::Fast:
1911  if (!Subtarget->isAAPCS_ABI()) {
1912  if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() && !isVarArg)
1913  return CallingConv::Fast;
1914  return CallingConv::ARM_APCS;
1915  } else if (Subtarget->hasVFP2Base() &&
1916  !Subtarget->isThumb1Only() && !isVarArg)
1918  else
1919  return CallingConv::ARM_AAPCS;
1920  }
1921 }
1922 
1924  bool isVarArg) const {
1925  return CCAssignFnForNode(CC, false, isVarArg);
1926 }
1927 
1929  bool isVarArg) const {
1930  return CCAssignFnForNode(CC, true, isVarArg);
1931 }
1932 
1933 /// CCAssignFnForNode - Selects the correct CCAssignFn for the given
1934 /// CallingConvention.
1935 CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
1936  bool Return,
1937  bool isVarArg) const {
1938  switch (getEffectiveCallingConv(CC, isVarArg)) {
1939  default:
1940  report_fatal_error("Unsupported calling convention");
1941  case CallingConv::ARM_APCS:
1942  return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
1944  return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
1946  return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
1947  case CallingConv::Fast:
1948  return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
1949  case CallingConv::GHC:
1950  return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
1952  return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
1954  return (Return ? RetCC_ARM_AAPCS : CC_ARM_Win32_CFGuard_Check);
1955  }
1956 }
1957 
1958 /// LowerCallResult - Lower the result values of a call into the
1959 /// appropriate copies out of appropriate physical registers.
1960 SDValue ARMTargetLowering::LowerCallResult(
1961  SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
1962  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1963  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
1964  SDValue ThisVal) const {
1965  // Assign locations to each value returned by this call.
1967  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
1968  *DAG.getContext());
1969  CCInfo.AnalyzeCallResult(Ins, CCAssignFnForReturn(CallConv, isVarArg));
1970 
1971  // Copy all of the result registers out of their specified physreg.
1972  for (unsigned i = 0; i != RVLocs.size(); ++i) {
1973  CCValAssign VA = RVLocs[i];
1974 
1975  // Pass 'this' value directly from the argument to return value, to avoid
1976  // reg unit interference
1977  if (i == 0 && isThisReturn) {
1978  assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&
1979  "unexpected return calling convention register assignment");
1980  InVals.push_back(ThisVal);
1981  continue;
1982  }
1983 
1984  SDValue Val;
1985  if (VA.needsCustom()) {
1986  // Handle f64 or half of a v2f64.
1987  SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
1988  InFlag);
1989  Chain = Lo.getValue(1);
1990  InFlag = Lo.getValue(2);
1991  VA = RVLocs[++i]; // skip ahead to next loc
1992  SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
1993  InFlag);
1994  Chain = Hi.getValue(1);
1995  InFlag = Hi.getValue(2);
1996  if (!Subtarget->isLittle())
1997  std::swap (Lo, Hi);
1998  Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
1999 
2000  if (VA.getLocVT() == MVT::v2f64) {
2001  SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
2002  Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
2003  DAG.getConstant(0, dl, MVT::i32));
2004 
2005  VA = RVLocs[++i]; // skip ahead to next loc
2006  Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
2007  Chain = Lo.getValue(1);
2008  InFlag = Lo.getValue(2);
2009  VA = RVLocs[++i]; // skip ahead to next loc
2010  Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
2011  Chain = Hi.getValue(1);
2012  InFlag = Hi.getValue(2);
2013  if (!Subtarget->isLittle())
2014  std::swap (Lo, Hi);
2015  Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
2016  Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
2017  DAG.getConstant(1, dl, MVT::i32));
2018  }
2019  } else {
2020  Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
2021  InFlag);
2022  Chain = Val.getValue(1);
2023  InFlag = Val.getValue(2);
2024  }
2025 
2026  switch (VA.getLocInfo()) {
2027  default: llvm_unreachable("Unknown loc info!");
2028  case CCValAssign::Full: break;
2029  case CCValAssign::BCvt:
2030  Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
2031  break;
2032  }
2033 
2034  InVals.push_back(Val);
2035  }
2036 
2037  return Chain;
2038 }
2039 
2040 /// LowerMemOpCallTo - Store the argument to the stack.
2041 SDValue ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
2042  SDValue Arg, const SDLoc &dl,
2043  SelectionDAG &DAG,
2044  const CCValAssign &VA,
2045  ISD::ArgFlagsTy Flags) const {
2046  unsigned LocMemOffset = VA.getLocMemOffset();
2047  SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
2048  PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
2049  StackPtr, PtrOff);
2050  return DAG.getStore(
2051  Chain, dl, Arg, PtrOff,
2052  MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset));
2053 }
2054 
2055 void ARMTargetLowering::PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG,
2056  SDValue Chain, SDValue &Arg,
2057  RegsToPassVector &RegsToPass,
2058  CCValAssign &VA, CCValAssign &NextVA,
2059  SDValue &StackPtr,
2060  SmallVectorImpl<SDValue> &MemOpChains,
2061  ISD::ArgFlagsTy Flags) const {
2062  SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
2063  DAG.getVTList(MVT::i32, MVT::i32), Arg);
2064  unsigned id = Subtarget->isLittle() ? 0 : 1;
2065  RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id)));
2066 
2067  if (NextVA.isRegLoc())
2068  RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id)));
2069  else {
2070  assert(NextVA.isMemLoc());
2071  if (!StackPtr.getNode())
2072  StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP,
2073  getPointerTy(DAG.getDataLayout()));
2074 
2075  MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1-id),
2076  dl, DAG, NextVA,
2077  Flags));
2078  }
2079 }
2080 
2081 /// LowerCall - Lowering a call into a callseq_start <-
2082 /// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
2083 /// nodes.
2084 SDValue
2085 ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
2086  SmallVectorImpl<SDValue> &InVals) const {
2087  SelectionDAG &DAG = CLI.DAG;
2088  SDLoc &dl = CLI.DL;
2090  SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2092  SDValue Chain = CLI.Chain;
2093  SDValue Callee = CLI.Callee;
2094  bool &isTailCall = CLI.IsTailCall;
2095  CallingConv::ID CallConv = CLI.CallConv;
2096  bool doesNotRet = CLI.DoesNotReturn;
2097  bool isVarArg = CLI.IsVarArg;
2098 
2099  MachineFunction &MF = DAG.getMachineFunction();
2101  bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
2102  bool isThisReturn = false;
2103  bool PreferIndirect = false;
2104 
2105  // Disable tail calls if they're not supported.
2106  if (!Subtarget->supportsTailCall())
2107  isTailCall = false;
2108 
2109  if (isa<GlobalAddressSDNode>(Callee)) {
2110  // If we're optimizing for minimum size and the function is called three or
2111  // more times in this block, we can improve codesize by calling indirectly
2112  // as BLXr has a 16-bit encoding.
2113  auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
2114  if (CLI.CS) {
2115  auto *BB = CLI.CS.getParent();
2116  PreferIndirect = Subtarget->isThumb() && Subtarget->hasMinSize() &&
2117  count_if(GV->users(), [&BB](const User *U) {
2118  return isa<Instruction>(U) &&
2119  cast<Instruction>(U)->getParent() == BB;
2120  }) > 2;
2121  }
2122  }
2123  if (isTailCall) {
2124  // Check if it's really possible to do a tail call.
2125  isTailCall = IsEligibleForTailCallOptimization(
2126  Callee, CallConv, isVarArg, isStructRet,
2127  MF.getFunction().hasStructRetAttr(), Outs, OutVals, Ins, DAG,
2128  PreferIndirect);
2129  if (!isTailCall && CLI.CS && CLI.CS.isMustTailCall())
2130  report_fatal_error("failed to perform tail call elimination on a call "
2131  "site marked musttail");
2132  // We don't support GuaranteedTailCallOpt for ARM, only automatically
2133  // detected sibcalls.
2134  if (isTailCall)
2135  ++NumTailCalls;
2136  }
2137 
2138  // Analyze operands of the call, assigning locations to each operand.
2140  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
2141  *DAG.getContext());
2142  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CallConv, isVarArg));
2143 
2144  // Get a count of how many bytes are to be pushed on the stack.
2145  unsigned NumBytes = CCInfo.getNextStackOffset();
2146 
2147  if (isTailCall) {
2148  // For tail calls, memory operands are available in our caller's stack.
2149  NumBytes = 0;
2150  } else {
2151  // Adjust the stack pointer for the new arguments...
2152  // These operations are automatically eliminated by the prolog/epilog pass
2153  Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
2154  }
2155 
2156  SDValue StackPtr =
2157  DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout()));
2158 
2159  RegsToPassVector RegsToPass;
2160  SmallVector<SDValue, 8> MemOpChains;
2161 
2162  // Walk the register/memloc assignments, inserting copies/loads. In the case
2163  // of tail call optimization, arguments are handled later.
2164  for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
2165  i != e;
2166  ++i, ++realArgIdx) {
2167  CCValAssign &VA = ArgLocs[i];
2168  SDValue Arg = OutVals[realArgIdx];
2169  ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
2170  bool isByVal = Flags.isByVal();
2171 
2172  // Promote the value if needed.
2173  switch (VA.getLocInfo()) {
2174  default: llvm_unreachable("Unknown loc info!");
2175  case CCValAssign::Full: break;
2176  case CCValAssign::SExt:
2177  Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
2178  break;
2179  case CCValAssign::ZExt:
2180  Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
2181  break;
2182  case CCValAssign::AExt:
2183  Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
2184  break;
2185  case CCValAssign::BCvt:
2186  Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2187  break;
2188  }
2189 
2190  // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
2191  if (VA.needsCustom()) {
2192  if (VA.getLocVT() == MVT::v2f64) {
2193  SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2194  DAG.getConstant(0, dl, MVT::i32));
2195  SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2196  DAG.getConstant(1, dl, MVT::i32));
2197 
2198  PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass,
2199  VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
2200 
2201  VA = ArgLocs[++i]; // skip ahead to next loc
2202  if (VA.isRegLoc()) {
2203  PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass,
2204  VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
2205  } else {
2206  assert(VA.isMemLoc());
2207 
2208  MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1,
2209  dl, DAG, VA, Flags));
2210  }
2211  } else {
2212  PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
2213  StackPtr, MemOpChains, Flags);
2214  }
2215  } else if (VA.isRegLoc()) {
2216  if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
2217  Outs[0].VT == MVT::i32) {
2218  assert(VA.getLocVT() == MVT::i32 &&
2219  "unexpected calling convention register assignment");
2220  assert(!Ins.empty() && Ins[0].VT == MVT::i32 &&
2221  "unexpected use of 'returned'");
2222  isThisReturn = true;
2223  }
2224  const TargetOptions &Options = DAG.getTarget().Options;
2225  if (Options.EnableDebugEntryValues)
2226  CSInfo.emplace_back(VA.getLocReg(), i);
2227  RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
2228  } else if (isByVal) {
2229  assert(VA.isMemLoc());
2230  unsigned offset = 0;
2231 
2232  // True if this byval aggregate will be split between registers
2233  // and memory.
2234  unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
2235  unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed();
2236 
2237  if (CurByValIdx < ByValArgsCount) {
2238 
2239  unsigned RegBegin, RegEnd;
2240  CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
2241 
2242  EVT PtrVT =
2244  unsigned int i, j;
2245  for (i = 0, j = RegBegin; j < RegEnd; i++, j++) {
2246  SDValue Const = DAG.getConstant(4*i, dl, MVT::i32);
2247  SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
2248  SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
2250  DAG.InferPtrAlignment(AddArg));
2251  MemOpChains.push_back(Load.getValue(1));
2252  RegsToPass.push_back(std::make_pair(j, Load));
2253  }
2254 
2255  // If parameter size outsides register area, "offset" value
2256  // helps us to calculate stack slot for remained part properly.
2257  offset = RegEnd - RegBegin;
2258 
2259  CCInfo.nextInRegsParam();
2260  }
2261 
2262  if (Flags.getByValSize() > 4*offset) {
2263  auto PtrVT = getPointerTy(DAG.getDataLayout());
2264  unsigned LocMemOffset = VA.getLocMemOffset();
2265  SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
2266  SDValue Dst = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, StkPtrOff);
2267  SDValue SrcOffset = DAG.getIntPtrConstant(4*offset, dl);
2268  SDValue Src = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, SrcOffset);
2269  SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, dl,
2270  MVT::i32);
2271  SDValue AlignNode = DAG.getConstant(Flags.getByValAlign(), dl,
2272  MVT::i32);
2273 
2274  SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
2275  SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
2276  MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
2277  Ops));
2278  }
2279  } else if (!isTailCall) {
2280  assert(VA.isMemLoc());
2281 
2282  MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
2283  dl, DAG, VA, Flags));
2284  }
2285  }
2286 
2287  if (!MemOpChains.empty())
2288  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
2289 
2290  // Build a sequence of copy-to-reg nodes chained together with token chain
2291  // and flag operands which copy the outgoing args into the appropriate regs.
2292  SDValue InFlag;
2293  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
2294  Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
2295  RegsToPass[i].second, InFlag);
2296  InFlag = Chain.getValue(1);
2297  }
2298 
2299  // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
2300  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
2301  // node so that legalize doesn't hack it.
2302  bool isDirect = false;
2303 
2304  const TargetMachine &TM = getTargetMachine();
2305  const Module *Mod = MF.getFunction().getParent();
2306  const GlobalValue *GV = nullptr;
2307  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
2308  GV = G->getGlobal();
2309  bool isStub =
2310  !TM.shouldAssumeDSOLocal(*Mod, GV) && Subtarget->isTargetMachO();
2311 
2312  bool isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
2313  bool isLocalARMFunc = false;
2315  auto PtrVt = getPointerTy(DAG.getDataLayout());
2316 
2317  if (Subtarget->genLongCalls()) {
2318  assert((!isPositionIndependent() || Subtarget->isTargetWindows()) &&
2319  "long-calls codegen is not position independent!");
2320  // Handle a global address or an external symbol. If it's not one of
2321  // those, the target's already in a register, so we don't need to do
2322  // anything extra.
2323  if (isa<GlobalAddressSDNode>(Callee)) {
2324  // Create a constant pool entry for the callee address
2325  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2326  ARMConstantPoolValue *CPV =
2327  ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0);
2328 
2329  // Get the address of the callee into a register
2330  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
2331  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2332  Callee = DAG.getLoad(
2333  PtrVt, dl, DAG.getEntryNode(), CPAddr,
2335  } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
2336  const char *Sym = S->getSymbol();
2337 
2338  // Create a constant pool entry for the callee address
2339  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2340  ARMConstantPoolValue *CPV =
2342  ARMPCLabelIndex, 0);
2343  // Get the address of the callee into a register
2344  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
2345  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2346  Callee = DAG.getLoad(
2347  PtrVt, dl, DAG.getEntryNode(), CPAddr,
2349  }
2350  } else if (isa<GlobalAddressSDNode>(Callee)) {
2351  if (!PreferIndirect) {
2352  isDirect = true;
2353  bool isDef = GV->isStrongDefinitionForLinker();
2354 
2355  // ARM call to a local ARM function is predicable.
2356  isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking);
2357  // tBX takes a register source operand.
2358  if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2359  assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?");
2360  Callee = DAG.getNode(
2361  ARMISD::WrapperPIC, dl, PtrVt,
2362  DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, ARMII::MO_NONLAZY));
2363  Callee = DAG.getLoad(
2364  PtrVt, dl, DAG.getEntryNode(), Callee,
2366  /* Alignment = */ 0, MachineMemOperand::MODereferenceable |
2368  } else if (Subtarget->isTargetCOFF()) {
2369  assert(Subtarget->isTargetWindows() &&
2370  "Windows is the only supported COFF target");
2371  unsigned TargetFlags = ARMII::MO_NO_FLAG;
2372  if (GV->hasDLLImportStorageClass())
2373  TargetFlags = ARMII::MO_DLLIMPORT;
2374  else if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
2375  TargetFlags = ARMII::MO_COFFSTUB;
2376  Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, /*offset=*/0,
2377  TargetFlags);
2378  if (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB))
2379  Callee =
2380  DAG.getLoad(PtrVt, dl, DAG.getEntryNode(),
2381  DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee),
2383  } else {
2384  Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, 0);
2385  }
2386  }
2387  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2388  isDirect = true;
2389  // tBX takes a register source operand.
2390  const char *Sym = S->getSymbol();
2391  if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2392  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2393  ARMConstantPoolValue *CPV =
2395  ARMPCLabelIndex, 4);
2396  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
2397  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2398  Callee = DAG.getLoad(
2399  PtrVt, dl, DAG.getEntryNode(), CPAddr,
2401  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2402  Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel);
2403  } else {
2404  Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, 0);
2405  }
2406  }
2407 
2408  // FIXME: handle tail calls differently.
2409  unsigned CallOpc;
2410  if (Subtarget->isThumb()) {
2411  if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
2412  CallOpc = ARMISD::CALL_NOLINK;
2413  else
2414  CallOpc = ARMISD::CALL;
2415  } else {
2416  if (!isDirect && !Subtarget->hasV5TOps())
2417  CallOpc = ARMISD::CALL_NOLINK;
2418  else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() &&
2419  // Emit regular call when code size is the priority
2420  !Subtarget->hasMinSize())
2421  // "mov lr, pc; b _foo" to avoid confusing the RSP
2422  CallOpc = ARMISD::CALL_NOLINK;
2423  else
2424  CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL;
2425  }
2426 
2427  std::vector<SDValue> Ops;
2428  Ops.push_back(Chain);
2429  Ops.push_back(Callee);
2430 
2431  // Add argument registers to the end of the list so that they are known live
2432  // into the call.
2433  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
2434  Ops.push_back(DAG.getRegister(RegsToPass[i].first,
2435  RegsToPass[i].second.getValueType()));
2436 
2437  // Add a register mask operand representing the call-preserved registers.
2438  if (!isTailCall) {
2439  const uint32_t *Mask;
2440  const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
2441  if (isThisReturn) {
2442  // For 'this' returns, use the R0-preserving mask if applicable
2443  Mask = ARI->getThisReturnPreservedMask(MF, CallConv);
2444  if (!Mask) {
2445  // Set isThisReturn to false if the calling convention is not one that
2446  // allows 'returned' to be modeled in this way, so LowerCallResult does
2447  // not try to pass 'this' straight through
2448  isThisReturn = false;
2449  Mask = ARI->getCallPreservedMask(MF, CallConv);
2450  }
2451  } else
2452  Mask = ARI->getCallPreservedMask(MF, CallConv);
2453 
2454  assert(Mask && "Missing call preserved mask for calling convention");
2455  Ops.push_back(DAG.getRegisterMask(Mask));
2456  }
2457 
2458  if (InFlag.getNode())
2459  Ops.push_back(InFlag);
2460 
2461  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2462  if (isTailCall) {
2464  SDValue Ret = DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops);
2465  DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
2466  return Ret;
2467  }
2468 
2469  // Returns a chain and a flag for retval copy to use.
2470  Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
2471  InFlag = Chain.getValue(1);
2472  DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
2473 
2474  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
2475  DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
2476  if (!Ins.empty())
2477  InFlag = Chain.getValue(1);
2478 
2479  // Handle result values, copying them out of physregs into vregs that we
2480  // return.
2481  return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
2482  InVals, isThisReturn,
2483  isThisReturn ? OutVals[0] : SDValue());
2484 }
2485 
2486 /// HandleByVal - Every parameter *after* a byval parameter is passed
2487 /// on the stack. Remember the next parameter register to allocate,
2488 /// and then confiscate the rest of the parameter registers to insure
2489 /// this.
2490 void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
2491  unsigned Align) const {
2492  // Byval (as with any stack) slots are always at least 4 byte aligned.
2493  Align = std::max(Align, 4U);
2494 
2495  unsigned Reg = State->AllocateReg(GPRArgRegs);
2496  if (!Reg)
2497  return;
2498 
2499  unsigned AlignInRegs = Align / 4;
2500  unsigned Waste = (ARM::R4 - Reg) % AlignInRegs;
2501  for (unsigned i = 0; i < Waste; ++i)
2502  Reg = State->AllocateReg(GPRArgRegs);
2503 
2504  if (!Reg)
2505  return;
2506 
2507  unsigned Excess = 4 * (ARM::R4 - Reg);
2508 
2509  // Special case when NSAA != SP and parameter size greater than size of
2510  // all remained GPR regs. In that case we can't split parameter, we must
2511  // send it to stack. We also must set NCRN to R4, so waste all
2512  // remained registers.
2513  const unsigned NSAAOffset = State->getNextStackOffset();
2514  if (NSAAOffset != 0 && Size > Excess) {
2515  while (State->AllocateReg(GPRArgRegs))
2516  ;
2517  return;
2518  }
2519 
2520  // First register for byval parameter is the first register that wasn't
2521  // allocated before this method call, so it would be "reg".
2522  // If parameter is small enough to be saved in range [reg, r4), then
2523  // the end (first after last) register would be reg + param-size-in-regs,
2524  // else parameter would be splitted between registers and stack,
2525  // end register would be r4 in this case.
2526  unsigned ByValRegBegin = Reg;
2527  unsigned ByValRegEnd = std::min<unsigned>(Reg + Size / 4, ARM::R4);
2528  State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
2529  // Note, first register is allocated in the beginning of function already,
2530  // allocate remained amount of registers we need.
2531  for (unsigned i = Reg + 1; i != ByValRegEnd; ++i)
2532  State->AllocateReg(GPRArgRegs);
2533  // A byval parameter that is split between registers and memory needs its
2534  // size truncated here.
2535  // In the case where the entire structure fits in registers, we set the
2536  // size in memory to zero.
2537  Size = std::max<int>(Size - Excess, 0);
2538 }
2539 
2540 /// MatchingStackOffset - Return true if the given stack call argument is
2541 /// already available in the same position (relatively) of the caller's
2542 /// incoming argument stack.
2543 static
2546  const TargetInstrInfo *TII) {
2547  unsigned Bytes = Arg.getValueSizeInBits() / 8;
2548  int FI = std::numeric_limits<int>::max();
2549  if (Arg.getOpcode() == ISD::CopyFromReg) {
2550  unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
2551  if (!Register::isVirtualRegister(VR))
2552  return false;
2553  MachineInstr *Def = MRI->getVRegDef(VR);
2554  if (!Def)
2555  return false;
2556  if (!Flags.isByVal()) {
2557  if (!TII->isLoadFromStackSlot(*Def, FI))
2558  return false;
2559  } else {
2560  return false;
2561  }
2562  } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
2563  if (Flags.isByVal())
2564  // ByVal argument is passed in as a pointer but it's now being
2565  // dereferenced. e.g.
2566  // define @foo(%struct.X* %A) {
2567  // tail call @bar(%struct.X* byval %A)
2568  // }
2569  return false;
2570  SDValue Ptr = Ld->getBasePtr();
2571  FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
2572  if (!FINode)
2573  return false;
2574  FI = FINode->getIndex();
2575  } else
2576  return false;
2577 
2579  if (!MFI.isFixedObjectIndex(FI))
2580  return false;
2581  return Offset == MFI.getObjectOffset(FI) && Bytes == MFI.getObjectSize(FI);
2582 }
2583 
2584 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
2585 /// for tail call optimization. Targets which want to do tail call
2586 /// optimization should implement this function.
2587 bool ARMTargetLowering::IsEligibleForTailCallOptimization(
2588  SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
2589  bool isCalleeStructRet, bool isCallerStructRet,
2590  const SmallVectorImpl<ISD::OutputArg> &Outs,
2591  const SmallVectorImpl<SDValue> &OutVals,
2593  const bool isIndirect) const {
2594  MachineFunction &MF = DAG.getMachineFunction();
2595  const Function &CallerF = MF.getFunction();
2596  CallingConv::ID CallerCC = CallerF.getCallingConv();
2597 
2598  assert(Subtarget->supportsTailCall());
2599 
2600  // Indirect tail calls cannot be optimized for Thumb1 if the args
2601  // to the call take up r0-r3. The reason is that there are no legal registers
2602  // left to hold the pointer to the function to be called.
2603  if (Subtarget->isThumb1Only() && Outs.size() >= 4 &&
2604  (!isa<GlobalAddressSDNode>(Callee.getNode()) || isIndirect))
2605  return false;
2606 
2607  // Look for obvious safe cases to perform tail call optimization that do not
2608  // require ABI changes. This is what gcc calls sibcall.
2609 
2610  // Exception-handling functions need a special set of instructions to indicate
2611  // a return to the hardware. Tail-calling another function would probably
2612  // break this.
2613  if (CallerF.hasFnAttribute("interrupt"))
2614  return false;
2615 
2616  // Also avoid sibcall optimization if either caller or callee uses struct
2617  // return semantics.
2618  if (isCalleeStructRet || isCallerStructRet)
2619  return false;
2620 
2621  // Externally-defined functions with weak linkage should not be
2622  // tail-called on ARM when the OS does not support dynamic
2623  // pre-emption of symbols, as the AAELF spec requires normal calls
2624  // to undefined weak functions to be replaced with a NOP or jump to the
2625  // next instruction. The behaviour of branch instructions in this
2626  // situation (as used for tail calls) is implementation-defined, so we
2627  // cannot rely on the linker replacing the tail call with a return.
2628  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2629  const GlobalValue *GV = G->getGlobal();
2631  if (GV->hasExternalWeakLinkage() &&
2632  (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
2633  return false;
2634  }
2635 
2636  // Check that the call results are passed in the same way.
2637  LLVMContext &C = *DAG.getContext();
2638  if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
2639  CCAssignFnForReturn(CalleeCC, isVarArg),
2640  CCAssignFnForReturn(CallerCC, isVarArg)))
2641  return false;
2642  // The callee has to preserve all registers the caller needs to preserve.
2643  const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
2644  const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
2645  if (CalleeCC != CallerCC) {
2646  const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
2647  if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
2648  return false;
2649  }
2650 
2651  // If Caller's vararg or byval argument has been split between registers and
2652  // stack, do not perform tail call, since part of the argument is in caller's
2653  // local frame.
2654  const ARMFunctionInfo *AFI_Caller = MF.getInfo<ARMFunctionInfo>();
2655  if (AFI_Caller->getArgRegsSaveSize())
2656  return false;
2657 
2658  // If the callee takes no arguments then go on to check the results of the
2659  // call.
2660  if (!Outs.empty()) {
2661  // Check if stack adjustment is needed. For now, do not do this if any
2662  // argument is passed on the stack.
2664  CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
2665  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
2666  if (CCInfo.getNextStackOffset()) {
2667  // Check if the arguments are already laid out in the right way as
2668  // the caller's fixed stack objects.
2669  MachineFrameInfo &MFI = MF.getFrameInfo();
2670  const MachineRegisterInfo *MRI = &MF.getRegInfo();
2671  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2672  for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
2673  i != e;
2674  ++i, ++realArgIdx) {
2675  CCValAssign &VA = ArgLocs[i];
2676  EVT RegVT = VA.getLocVT();
2677  SDValue Arg = OutVals[realArgIdx];
2678  ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
2679  if (VA.getLocInfo() == CCValAssign::Indirect)
2680  return false;
2681  if (VA.needsCustom()) {
2682  // f64 and vector types are split into multiple registers or
2683  // register/stack-slot combinations. The types will not match
2684  // the registers; give up on memory f64 refs until we figure
2685  // out what to do about this.
2686  if (!VA.isRegLoc())
2687  return false;
2688  if (!ArgLocs[++i].isRegLoc())
2689  return false;
2690  if (RegVT == MVT::v2f64) {
2691  if (!ArgLocs[++i].isRegLoc())
2692  return false;
2693  if (!ArgLocs[++i].isRegLoc())
2694  return false;
2695  }
2696  } else if (!VA.isRegLoc()) {
2697  if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
2698  MFI, MRI, TII))
2699  return false;
2700  }
2701  }
2702  }
2703 
2704  const MachineRegisterInfo &MRI = MF.getRegInfo();
2705  if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
2706  return false;
2707  }
2708 
2709  return true;
2710 }
2711 
2712 bool
2713 ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
2714  MachineFunction &MF, bool isVarArg,
2715  const SmallVectorImpl<ISD::OutputArg> &Outs,
2716  LLVMContext &Context) const {
2718  CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
2719  return CCInfo.CheckReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
2720 }
2721 
2723  const SDLoc &DL, SelectionDAG &DAG) {
2724  const MachineFunction &MF = DAG.getMachineFunction();
2725  const Function &F = MF.getFunction();
2726 
2727  StringRef IntKind = F.getFnAttribute("interrupt").getValueAsString();
2728 
2729  // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset
2730  // version of the "preferred return address". These offsets affect the return
2731  // instruction if this is a return from PL1 without hypervisor extensions.
2732  // IRQ/FIQ: +4 "subs pc, lr, #4"
2733  // SWI: 0 "subs pc, lr, #0"
2734  // ABORT: +4 "subs pc, lr, #4"
2735  // UNDEF: +4/+2 "subs pc, lr, #0"
2736  // UNDEF varies depending on where the exception came from ARM or Thumb
2737  // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0.
2738 
2739  int64_t LROffset;
2740  if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" ||
2741  IntKind == "ABORT")
2742  LROffset = 4;
2743  else if (IntKind == "SWI" || IntKind == "UNDEF")
2744  LROffset = 0;
2745  else
2746  report_fatal_error("Unsupported interrupt attribute. If present, value "
2747  "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF");
2748 
2749  RetOps.insert(RetOps.begin() + 1,
2750  DAG.getConstant(LROffset, DL, MVT::i32, false));
2751 
2752  return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other, RetOps);
2753 }
2754 
2755 SDValue
2756 ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
2757  bool isVarArg,
2758  const SmallVectorImpl<ISD::OutputArg> &Outs,
2759  const SmallVectorImpl<SDValue> &OutVals,
2760  const SDLoc &dl, SelectionDAG &DAG) const {
2761  // CCValAssign - represent the assignment of the return value to a location.
2763 
2764  // CCState - Info about the registers and stack slots.
2765  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2766  *DAG.getContext());
2767 
2768  // Analyze outgoing return values.
2769  CCInfo.AnalyzeReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
2770 
2771  SDValue Flag;
2772  SmallVector<SDValue, 4> RetOps;
2773  RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
2774  bool isLittleEndian = Subtarget->isLittle();
2775 
2776  MachineFunction &MF = DAG.getMachineFunction();
2778  AFI->setReturnRegsCount(RVLocs.size());
2779 
2780  // Copy the result values into the output registers.
2781  for (unsigned i = 0, realRVLocIdx = 0;
2782  i != RVLocs.size();
2783  ++i, ++realRVLocIdx) {
2784  CCValAssign &VA = RVLocs[i];
2785  assert(VA.isRegLoc() && "Can only return in registers!");
2786 
2787  SDValue Arg = OutVals[realRVLocIdx];
2788  bool ReturnF16 = false;
2789 
2790  if (Subtarget->hasFullFP16() && Subtarget->isTargetHardFloat()) {
2791  // Half-precision return values can be returned like this:
2792  //
2793  // t11 f16 = fadd ...
2794  // t12: i16 = bitcast t11
2795  // t13: i32 = zero_extend t12
2796  // t14: f32 = bitcast t13 <~~~~~~~ Arg
2797  //
2798  // to avoid code generation for bitcasts, we simply set Arg to the node
2799  // that produces the f16 value, t11 in this case.
2800  //
2801  if (Arg.getValueType() == MVT::f32 && Arg.getOpcode() == ISD::BITCAST) {
2802  SDValue ZE = Arg.getOperand(0);
2803  if (ZE.getOpcode() == ISD::ZERO_EXTEND && ZE.getValueType() == MVT::i32) {
2804  SDValue BC = ZE.getOperand(0);
2805  if (BC.getOpcode() == ISD::BITCAST && BC.getValueType() == MVT::i16) {
2806  Arg = BC.getOperand(0);
2807  ReturnF16 = true;
2808  }
2809  }
2810  }
2811  }
2812 
2813  switch (VA.getLocInfo()) {
2814  default: llvm_unreachable("Unknown loc info!");
2815  case CCValAssign::Full: break;
2816  case CCValAssign::BCvt:
2817  if (!ReturnF16)
2818  Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2819  break;
2820  }
2821 
2822  if (VA.needsCustom()) {
2823  if (VA.getLocVT() == MVT::v2f64) {
2824  // Extract the first half and return it in two registers.
2825  SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2826  DAG.getConstant(0, dl, MVT::i32));
2827  SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
2828  DAG.getVTList(MVT::i32, MVT::i32), Half);
2829 
2830  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2831  HalfGPRs.getValue(isLittleEndian ? 0 : 1),
2832  Flag);
2833  Flag = Chain.getValue(1);
2834  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2835  VA = RVLocs[++i]; // skip ahead to next loc
2836  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2837  HalfGPRs.getValue(isLittleEndian ? 1 : 0),
2838  Flag);
2839  Flag = Chain.getValue(1);
2840  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2841  VA = RVLocs[++i]; // skip ahead to next loc
2842 
2843  // Extract the 2nd half and fall through to handle it as an f64 value.
2844  Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2845  DAG.getConstant(1, dl, MVT::i32));
2846  }
2847  // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is
2848  // available.
2849  SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
2850  DAG.getVTList(MVT::i32, MVT::i32), Arg);
2851  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2852  fmrrd.getValue(isLittleEndian ? 0 : 1),
2853  Flag);
2854  Flag = Chain.getValue(1);
2855  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2856  VA = RVLocs[++i]; // skip ahead to next loc
2857  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2858  fmrrd.getValue(isLittleEndian ? 1 : 0),
2859  Flag);
2860  } else
2861  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
2862 
2863  // Guarantee that all emitted copies are
2864  // stuck together, avoiding something bad.
2865  Flag = Chain.getValue(1);
2866  RetOps.push_back(DAG.getRegister(VA.getLocReg(),
2867  ReturnF16 ? MVT::f16 : VA.getLocVT()));
2868  }
2869  const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
2870  const MCPhysReg *I =
2872  if (I) {
2873  for (; *I; ++I) {
2874  if (ARM::GPRRegClass.contains(*I))
2875  RetOps.push_back(DAG.getRegister(*I, MVT::i32));
2876  else if (ARM::DPRRegClass.contains(*I))
2877  RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
2878  else
2879  llvm_unreachable("Unexpected register class in CSRsViaCopy!");
2880  }
2881  }
2882 
2883  // Update chain and glue.
2884  RetOps[0] = Chain;
2885  if (Flag.getNode())
2886  RetOps.push_back(Flag);
2887 
2888  // CPUs which aren't M-class use a special sequence to return from
2889  // exceptions (roughly, any instruction setting pc and cpsr simultaneously,
2890  // though we use "subs pc, lr, #N").
2891  //
2892  // M-class CPUs actually use a normal return sequence with a special
2893  // (hardware-provided) value in LR, so the normal code path works.
2894  if (DAG.getMachineFunction().getFunction().hasFnAttribute("interrupt") &&
2895  !Subtarget->isMClass()) {
2896  if (Subtarget->isThumb1Only())
2897  report_fatal_error("interrupt attribute is not supported in Thumb1");
2898  return LowerInterruptReturn(RetOps, dl, DAG);
2899  }
2900 
2901  return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, RetOps);
2902 }
2903 
2904 bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
2905  if (N->getNumValues() != 1)
2906  return false;
2907  if (!N->hasNUsesOfValue(1, 0))
2908  return false;
2909 
2910  SDValue TCChain = Chain;
2911  SDNode *Copy = *N->use_begin();
2912  if (Copy->getOpcode() == ISD::CopyToReg) {
2913  // If the copy has a glue operand, we conservatively assume it isn't safe to
2914  // perform a tail call.
2915  if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2916  return false;
2917  TCChain = Copy->getOperand(0);
2918  } else if (Copy->getOpcode() == ARMISD::VMOVRRD) {
2919  SDNode *VMov = Copy;
2920  // f64 returned in a pair of GPRs.
2922  for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
2923  UI != UE; ++UI) {
2924  if (UI->getOpcode() != ISD::CopyToReg)
2925  return false;
2926  Copies.insert(*UI);
2927  }
2928  if (Copies.size() > 2)
2929  return false;
2930 
2931  for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
2932  UI != UE; ++UI) {
2933  SDValue UseChain = UI->getOperand(0);
2934  if (Copies.count(UseChain.getNode()))
2935  // Second CopyToReg
2936  Copy = *UI;
2937  else {
2938  // We are at the top of this chain.
2939  // If the copy has a glue operand, we conservatively assume it
2940  // isn't safe to perform a tail call.
2941  if (UI->getOperand(UI->getNumOperands()-1).getValueType() == MVT::Glue)
2942  return false;
2943  // First CopyToReg
2944  TCChain = UseChain;
2945  }
2946  }
2947  } else if (Copy->getOpcode() == ISD::BITCAST) {
2948  // f32 returned in a single GPR.
2949  if (!Copy->hasOneUse())
2950  return false;
2951  Copy = *Copy->use_begin();
2952  if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
2953  return false;
2954  // If the copy has a glue operand, we conservatively assume it isn't safe to
2955  // perform a tail call.
2956  if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2957  return false;
2958  TCChain = Copy->getOperand(0);
2959  } else {
2960  return false;
2961  }
2962 
2963  bool HasRet = false;
2964  for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
2965  UI != UE; ++UI) {
2966  if (UI->getOpcode() != ARMISD::RET_FLAG &&
2967  UI->getOpcode() != ARMISD::INTRET_FLAG)
2968  return false;
2969  HasRet = true;
2970  }
2971 
2972  if (!HasRet)
2973  return false;
2974 
2975  Chain = TCChain;
2976  return true;
2977 }
2978 
2979 bool ARMTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
2980  if (!Subtarget->supportsTailCall())
2981  return false;
2982 
2983  if (!CI->isTailCall())
2984  return false;
2985 
2986  return true;
2987 }
2988 
2989 // Trying to write a 64 bit value so need to split into two 32 bit values first,
2990 // and pass the lower and high parts through.
2992  SDLoc DL(Op);
2993  SDValue WriteValue = Op->getOperand(2);
2994 
2995  // This function is only supposed to be called for i64 type argument.
2996  assert(WriteValue.getValueType() == MVT::i64
2997  && "LowerWRITE_REGISTER called for non-i64 type argument.");
2998 
2999  SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
3000  DAG.getConstant(0, DL, MVT::i32));
3001  SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
3002  DAG.getConstant(1, DL, MVT::i32));
3003  SDValue Ops[] = { Op->getOperand(0), Op->getOperand(1), Lo, Hi };
3004  return DAG.getNode(ISD::WRITE_REGISTER, DL, MVT::Other, Ops);
3005 }
3006 
3007 // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
3008 // their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
3009 // one of the above mentioned nodes. It has to be wrapped because otherwise
3010 // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
3011 // be used to form addressing mode. These wrapped nodes will be selected
3012 // into MOVi.
3013 SDValue ARMTargetLowering::LowerConstantPool(SDValue Op,
3014  SelectionDAG &DAG) const {
3015  EVT PtrVT = Op.getValueType();
3016  // FIXME there is no actual debug info here
3017  SDLoc dl(Op);
3018  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
3019  SDValue Res;
3020 
3021  // When generating execute-only code Constant Pools must be promoted to the
3022  // global data section. It's a bit ugly that we can't share them across basic
3023  // blocks, but this way we guarantee that execute-only behaves correct with
3024  // position-independent addressing modes.
3025  if (Subtarget->genExecuteOnly()) {
3026  auto AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
3027  auto T = const_cast<Type*>(CP->getType());
3028  auto C = const_cast<Constant*>(CP->getConstVal());
3029  auto M = const_cast<Module*>(DAG.getMachineFunction().
3030  getFunction().getParent());
3031  auto GV = new GlobalVariable(
3032  *M, T, /*isConstant=*/true, GlobalVariable::InternalLinkage, C,
3033  Twine(DAG.getDataLayout().getPrivateGlobalPrefix()) + "CP" +
3034  Twine(DAG.getMachineFunction().getFunctionNumber()) + "_" +
3035  Twine(AFI->createPICLabelUId())
3036  );
3037  SDValue GA = DAG.getTargetGlobalAddress(dyn_cast<GlobalValue>(GV),
3038  dl, PtrVT);
3039  return LowerGlobalAddress(GA, DAG);
3040  }
3041 
3042  if (CP->isMachineConstantPoolEntry())
3043  Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
3044  CP->getAlignment());
3045  else
3046  Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
3047  CP->getAlignment());
3048  return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
3049 }
3050 
3053 }
3054 
3055 SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
3056  SelectionDAG &DAG) const {
3057  MachineFunction &MF = DAG.getMachineFunction();
3059  unsigned ARMPCLabelIndex = 0;
3060  SDLoc DL(Op);
3061  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3062  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
3063  SDValue CPAddr;
3064  bool IsPositionIndependent = isPositionIndependent() || Subtarget->isROPI();
3065  if (!IsPositionIndependent) {
3066  CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4);
3067  } else {
3068  unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
3069  ARMPCLabelIndex = AFI->createPICLabelUId();
3070  ARMConstantPoolValue *CPV =
3071  ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex,
3072  ARMCP::CPBlockAddress, PCAdj);
3073  CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3074  }
3075  CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
3076  SDValue Result = DAG.getLoad(
3077  PtrVT, DL, DAG.getEntryNode(), CPAddr,
3079  if (!IsPositionIndependent)
3080  return Result;
3081  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, DL, MVT::i32);
3082  return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
3083 }
3084 
3085 /// Convert a TLS address reference into the correct sequence of loads
3086 /// and calls to compute the variable's address for Darwin, and return an
3087 /// SDValue containing the final node.
3088 
3089 /// Darwin only has one TLS scheme which must be capable of dealing with the
3090 /// fully general situation, in the worst case. This means:
3091 /// + "extern __thread" declaration.
3092 /// + Defined in a possibly unknown dynamic library.
3093 ///
3094 /// The general system is that each __thread variable has a [3 x i32] descriptor
3095 /// which contains information used by the runtime to calculate the address. The
3096 /// only part of this the compiler needs to know about is the first word, which
3097 /// contains a function pointer that must be called with the address of the
3098 /// entire descriptor in "r0".
3099 ///
3100 /// Since this descriptor may be in a different unit, in general access must
3101 /// proceed along the usual ARM rules. A common sequence to produce is:
3102 ///
3103 /// movw rT1, :lower16:_var$non_lazy_ptr
3104 /// movt rT1, :upper16:_var$non_lazy_ptr
3105 /// ldr r0, [rT1]
3106 /// ldr rT2, [r0]
3107 /// blx rT2
3108 /// [...address now in r0...]
3109 SDValue
3110 ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op,
3111  SelectionDAG &DAG) const {
3112  assert(Subtarget->isTargetDarwin() &&
3113  "This function expects a Darwin target");
3114  SDLoc DL(Op);
3115 
3116  // First step is to get the address of the actua global symbol. This is where
3117  // the TLS descriptor lives.
3118  SDValue DescAddr = LowerGlobalAddressDarwin(Op, DAG);
3119 
3120  // The first entry in the descriptor is a function pointer that we must call
3121  // to obtain the address of the variable.
3122  SDValue Chain = DAG.getEntryNode();
3123  SDValue FuncTLVGet = DAG.getLoad(
3124  MVT::i32, DL, Chain, DescAddr,
3126  /* Alignment = */ 4,
3129  Chain = FuncTLVGet.getValue(1);
3130 
3132  MachineFrameInfo &MFI = F.getFrameInfo();
3133  MFI.setAdjustsStack(true);
3134 
3135  // TLS calls preserve all registers except those that absolutely must be
3136  // trashed: R0 (it takes an argument), LR (it's a call) and CPSR (let's not be
3137  // silly).
3138  auto TRI =
3139  getTargetMachine().getSubtargetImpl(F.getFunction())->getRegisterInfo();
3140  auto ARI = static_cast<const ARMRegisterInfo *>(TRI);
3141  const uint32_t *Mask = ARI->getTLSCallPreservedMask(DAG.getMachineFunction());
3142 
3143  // Finally, we can make the call. This is just a degenerate version of a
3144  // normal AArch64 call node: r0 takes the address of the descriptor, and
3145  // returns the address of the variable in this thread.
3146  Chain = DAG.getCopyToReg(Chain, DL, ARM::R0, DescAddr, SDValue());
3147  Chain =
3149  Chain, FuncTLVGet, DAG.getRegister(ARM::R0, MVT::i32),
3150  DAG.getRegisterMask(Mask), Chain.getValue(1));
3151  return DAG.getCopyFromReg(Chain, DL, ARM::R0, MVT::i32, Chain.getValue(1));
3152 }
3153 
3154 SDValue
3155 ARMTargetLowering::LowerGlobalTLSAddressWindows(SDValue Op,
3156  SelectionDAG &DAG) const {
3157  assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering");
3158 
3159  SDValue Chain = DAG.getEntryNode();
3160  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3161  SDLoc DL(Op);
3162 
3163  // Load the current TEB (thread environment block)
3164  SDValue Ops[] = {Chain,
3165  DAG.getTargetConstant(Intrinsic::arm_mrc, DL, MVT::i32),
3166  DAG.getTargetConstant(15, DL, MVT::i32),
3167  DAG.getTargetConstant(0, DL, MVT::i32),
3168  DAG.getTargetConstant(13, DL, MVT::i32),
3169  DAG.getTargetConstant(0, DL, MVT::i32),
3170  DAG.getTargetConstant(2, DL, MVT::i32)};
3171  SDValue CurrentTEB = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
3172  DAG.getVTList(MVT::i32, MVT::Other), Ops);
3173 
3174  SDValue TEB = CurrentTEB.getValue(0);
3175  Chain = CurrentTEB.getValue(1);
3176 
3177  // Load the ThreadLocalStoragePointer from the TEB
3178  // A pointer to the TLS array is located at offset 0x2c from the TEB.
3179  SDValue TLSArray =
3180  DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x2c, DL));
3181  TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo());
3182 
3183  // The pointer to the thread's TLS data area is at the TLS Index scaled by 4
3184  // offset into the TLSArray.
3185 
3186  // Load the TLS index from the C runtime
3187  SDValue TLSIndex =
3188  DAG.getTargetExternalSymbol("_tls_index", PtrVT, ARMII::MO_NO_FLAG);
3189  TLSIndex = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, TLSIndex);
3190  TLSIndex = DAG.getLoad(PtrVT, DL, Chain, TLSIndex, MachinePointerInfo());
3191 
3192  SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex,
3193  DAG.getConstant(2, DL, MVT::i32));
3194  SDValue TLS = DAG.getLoad(PtrVT, DL, Chain,
3195  DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot),
3196  MachinePointerInfo());
3197 
3198  // Get the offset of the start of the .tls section (section base)
3199  const auto *GA = cast<GlobalAddressSDNode>(Op);
3200  auto *CPV = ARMConstantPoolConstant::Create(GA->getGlobal(), ARMCP::SECREL);
3201  SDValue Offset = DAG.getLoad(
3202  PtrVT, DL, Chain, DAG.getNode(ARMISD::Wrapper, DL, MVT::i32,
3203  DAG.getTargetConstantPool(CPV, PtrVT, 4)),
3205 
3206  return DAG.getNode(ISD::ADD, DL, PtrVT, TLS, Offset);
3207 }
3208 
3209 // Lower ISD::GlobalTLSAddress using the "general dynamic" model
3210 SDValue
3211 ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
3212  SelectionDAG &DAG) const {
3213  SDLoc dl(GA);
3214  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3215  unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
3216  MachineFunction &MF = DAG.getMachineFunction();
3218  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3219  ARMConstantPoolValue *CPV =
3220  ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
3221  ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
3222  SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3223  Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
3224  Argument = DAG.getLoad(
3225  PtrVT, dl, DAG.getEntryNode(), Argument,
3227  SDValue Chain = Argument.getValue(1);
3228 
3229  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3230  Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
3231 
3232  // call __tls_get_addr.
3233  ArgListTy Args;
3235  Entry.Node = Argument;
3236  Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
3237  Args.push_back(Entry);
3238 
3239  // FIXME: is there useful debug info available here?
3241  CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3243  DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args));
3244 
3245  std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3246  return CallResult.first;
3247 }
3248 
3249 // Lower ISD::GlobalTLSAddress using the "initial exec" or
3250 // "local exec" model.
3251 SDValue
3252 ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
3253  SelectionDAG &DAG,
3254  TLSModel::Model model) const {
3255  const GlobalValue *GV = GA->getGlobal();
3256  SDLoc dl(GA);
3257  SDValue Offset;
3258  SDValue Chain = DAG.getEntryNode();
3259  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3260  // Get the Thread Pointer
3262 
3263  if (model == TLSModel::InitialExec) {
3264  MachineFunction &MF = DAG.getMachineFunction();
3266  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3267  // Initial exec model.
3268  unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
3269  ARMConstantPoolValue *CPV =
3270  ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
3272  true);
3273  Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3274  Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
3275  Offset = DAG.getLoad(
3276  PtrVT, dl, Chain, Offset,
3278  Chain = Offset.getValue(1);
3279 
3280  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3281  Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
3282 
3283  Offset = DAG.getLoad(
3284  PtrVT, dl, Chain, Offset,
3286  } else {
3287  // local exec model
3288  assert(model == TLSModel::LocalExec);
3289  ARMConstantPoolValue *CPV =
3291  Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3292  Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
3293  Offset = DAG.getLoad(
3294  PtrVT, dl, Chain, Offset,
3296  }
3297 
3298  // The address of the thread local variable is the add of the thread
3299  // pointer with the offset of the variable.
3300  return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
3301 }
3302 
3303 SDValue
3304 ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
3305  GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3306  if (DAG.getTarget().useEmulatedTLS())
3307  return LowerToTLSEmulatedModel(GA, DAG);
3308 
3309  if (Subtarget->isTargetDarwin())
3310  return LowerGlobalTLSAddressDarwin(Op, DAG);
3311 
3312  if (Subtarget->isTargetWindows())
3313  return LowerGlobalTLSAddressWindows(Op, DAG);
3314 
3315  // TODO: implement the "local dynamic" model
3316  assert(Subtarget->isTargetELF() && "Only ELF implemented here");
3318 
3319  switch (model) {
3322  return LowerToTLSGeneralDynamicModel(GA, DAG);
3323  case TLSModel::InitialExec:
3324  case TLSModel::LocalExec:
3325  return LowerToTLSExecModels(GA, DAG, model);
3326  }
3327  llvm_unreachable("bogus TLS model");
3328 }
3329 
3330 /// Return true if all users of V are within function F, looking through
3331 /// ConstantExprs.
3332 static bool allUsersAreInFunction(const Value *V, const Function *F) {
3333  SmallVector<const User*,4> Worklist;
3334  for (auto *U : V->users())
3335  Worklist.push_back(U);
3336  while (!Worklist.empty()) {
3337  auto *U = Worklist.pop_back_val();
3338  if (isa<ConstantExpr>(U)) {
3339  for (auto *UU : U->users())
3340  Worklist.push_back(UU);
3341  continue;
3342  }
3343 
3344  auto *I = dyn_cast<Instruction>(U);
3345  if (!I || I->getParent()->getParent() != F)
3346  return false;
3347  }
3348  return true;
3349 }
3350 
3352  const GlobalValue *GV, SelectionDAG &DAG,
3353  EVT PtrVT, const SDLoc &dl) {
3354  // If we're creating a pool entry for a constant global with unnamed address,
3355  // and the global is small enough, we can emit it inline into the constant pool
3356  // to save ourselves an indirection.
3357  //
3358  // This is a win if the constant is only used in one function (so it doesn't
3359  // need to be duplicated) or duplicating the constant wouldn't increase code
3360  // size (implying the constant is no larger than 4 bytes).
3361  const Function &F = DAG.getMachineFunction().getFunction();
3362 
3363  // We rely on this decision to inline being idemopotent and unrelated to the
3364  // use-site. We know that if we inline a variable at one use site, we'll
3365  // inline it elsewhere too (and reuse the constant pool entry). Fast-isel
3366  // doesn't know about this optimization, so bail out if it's enabled else
3367  // we could decide to inline here (and thus never emit the GV) but require
3368  // the GV from fast-isel generated code.
3369  if (!EnableConstpoolPromotion ||
3371  return SDValue();
3372 
3373  auto *GVar = dyn_cast<GlobalVariable>(GV);
3374  if (!GVar || !GVar->hasInitializer() ||
3375  !GVar->isConstant() || !GVar->hasGlobalUnnamedAddr() ||
3376  !GVar->hasLocalLinkage())
3377  return SDValue();
3378 
3379  // If we inline a value that contains relocations, we move the relocations
3380  // from .data to .text. This is not allowed in position-independent code.
3381  auto *Init = GVar->getInitializer();
3382  if ((TLI->isPositionIndependent() || TLI->getSubtarget()->isROPI()) &&
3383  Init->needsRelocation())
3384  return SDValue();
3385 
3386  // The constant islands pass can only really deal with alignment requests
3387  // <= 4 bytes and cannot pad constants itself. Therefore we cannot promote
3388  // any type wanting greater alignment requirements than 4 bytes. We also
3389  // can only promote constants that are multiples of 4 bytes in size or
3390  // are paddable to a multiple of 4. Currently we only try and pad constants
3391  // that are strings for simplicity.
3392  auto *CDAInit = dyn_cast<ConstantDataArray>(Init);
3393  unsigned Size = DAG.getDataLayout().getTypeAllocSize(Init->getType());
3394  unsigned Align = DAG.getDataLayout().getPreferredAlignment(GVar);
3395  unsigned RequiredPadding = 4 - (Size % 4);
3396  bool PaddingPossible =
3397  RequiredPadding == 4 || (CDAInit && CDAInit->isString());
3398  if (!PaddingPossible || Align > 4 || Size > ConstpoolPromotionMaxSize ||
3399  Size == 0)
3400  return SDValue();
3401 
3402  unsigned PaddedSize = Size + ((RequiredPadding == 4) ? 0 : RequiredPadding);
3403  MachineFunction &MF = DAG.getMachineFunction();
3405 
3406  // We can't bloat the constant pool too much, else the ConstantIslands pass
3407  // may fail to converge. If we haven't promoted this global yet (it may have
3408  // multiple uses), and promoting it would increase the constant pool size (Sz
3409  // > 4), ensure we have space to do so up to MaxTotal.
3410  if (!AFI->getGlobalsPromotedToConstantPool().count(GVar) && Size > 4)
3411  if (AFI->getPromotedConstpoolIncrease() + PaddedSize - 4 >=
3413  return SDValue();
3414 
3415  // This is only valid if all users are in a single function; we can't clone
3416  // the constant in general. The LLVM IR unnamed_addr allows merging
3417  // constants, but not cloning them.
3418  //
3419  // We could potentially allow cloning if we could prove all uses of the
3420  // constant in the current function don't care about the address, like
3421  // printf format strings. But that isn't implemented for now.
3422  if (!allUsersAreInFunction(GVar, &F))
3423  return SDValue();
3424 
3425  // We're going to inline this global. Pad it out if needed.
3426  if (RequiredPadding != 4) {
3427  StringRef S = CDAInit->getAsString();
3428 
3430  std::copy(S.bytes_begin(), S.bytes_end(), V.begin());
3431  while (RequiredPadding--)
3432  V.push_back(0);
3433  Init = ConstantDataArray::get(*DAG.getContext(), V);
3434  }
3435 
3436  auto CPVal = ARMConstantPoolConstant::Create(GVar, Init);
3437  SDValue CPAddr =
3438  DAG.getTargetConstantPool(CPVal, PtrVT, /*Align=*/4);
3439  if (!AFI->getGlobalsPromotedToConstantPool().count(GVar)) {
3442  PaddedSize - 4);
3443  }
3444  ++NumConstpoolPromoted;
3445  return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3446 }
3447 
3449  if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
3450  if (!(GV = GA->getBaseObject()))
3451  return false;
3452  if (const auto *V = dyn_cast<GlobalVariable>(GV))
3453  return V->isConstant();
3454  return isa<Function>(GV);
3455 }
3456 
3457 SDValue ARMTargetLowering::LowerGlobalAddress(SDValue Op,
3458  SelectionDAG &DAG) const {
3459  switch (Subtarget->getTargetTriple().getObjectFormat()) {
3460  default: llvm_unreachable("unknown object format");
3461  case Triple::COFF:
3462  return LowerGlobalAddressWindows(Op, DAG);
3463  case Triple::ELF:
3464  return LowerGlobalAddressELF(Op, DAG);
3465  case Triple::MachO:
3466  return LowerGlobalAddressDarwin(Op, DAG);
3467  }
3468 }
3469 
3470 SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
3471  SelectionDAG &DAG) const {
3472  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3473  SDLoc dl(Op);
3474  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3475  const TargetMachine &TM = getTargetMachine();
3476  bool IsRO = isReadOnly(GV);
3477 
3478  // promoteToConstantPool only if not generating XO text section
3479  if (TM.shouldAssumeDSOLocal(*GV->getParent(), GV) && !Subtarget->genExecuteOnly())
3480  if (SDValue V = promoteToConstantPool(this, GV, DAG, PtrVT, dl))
3481  return V;
3482 
3483  if (isPositionIndependent()) {
3484  bool UseGOT_PREL = !TM.shouldAssumeDSOLocal(*GV->getParent(), GV);
3485  SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3486  UseGOT_PREL ? ARMII::MO_GOT : 0);
3487  SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
3488  if (UseGOT_PREL)
3489  Result =
3490  DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
3492  return Result;
3493  } else if (Subtarget->isROPI() && IsRO) {
3494  // PC-relative.
3495  SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT);
3496  SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
3497  return Result;
3498  } else if (Subtarget->isRWPI() && !IsRO) {
3499  // SB-relative.
3500  SDValue RelAddr;
3501  if (Subtarget->useMovt()) {
3502  ++NumMovwMovt;
3503  SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_SBREL);
3504  RelAddr = DAG.getNode(ARMISD::Wrapper, dl, PtrVT, G);
3505  } else { // use literal pool for address constant
3506  ARMConstantPoolValue *CPV =
3508  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3509  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3510  RelAddr = DAG.getLoad(
3511  PtrVT, dl, DAG.getEntryNode(), CPAddr,
3513  }
3514  SDValue SB = DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::R9, PtrVT);
3515  SDValue Result = DAG.getNode(ISD::ADD, dl, PtrVT, SB, RelAddr);
3516  return Result;
3517  }
3518 
3519  // If we have T2 ops, we can materialize the address directly via movt/movw
3520  // pair. This is always cheaper.
3521  if (Subtarget->useMovt()) {
3522  ++NumMovwMovt;
3523  // FIXME: Once remat is capable of dealing with instructions with register
3524  // operands, expand this into two nodes.
3525  return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
3526  DAG.getTargetGlobalAddress(GV, dl, PtrVT));
3527  } else {
3528  SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
3529  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3530  return DAG.getLoad(
3531  PtrVT, dl, DAG.getEntryNode(), CPAddr,
3533  }
3534 }
3535 
3536 SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
3537  SelectionDAG &DAG) const {
3538  assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
3539  "ROPI/RWPI not currently supported for Darwin");
3540  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3541  SDLoc dl(Op);
3542  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3543 
3544  if (Subtarget->useMovt())
3545  ++NumMovwMovt;
3546 
3547  // FIXME: Once remat is capable of dealing with instructions with register
3548  // operands, expand this into multiple nodes
3549  unsigned Wrapper =
3551 
3552  SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_NONLAZY);
3553  SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, G);
3554 
3555  if (Subtarget->isGVIndirectSymbol(GV))
3556  Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
3558  return Result;
3559 }
3560 
3561 SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
3562  SelectionDAG &DAG) const {
3563  assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported");
3564  assert(Subtarget->useMovt() &&
3565  "Windows on ARM expects to use movw/movt");
3566  assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
3567  "ROPI/RWPI not currently supported for Windows");
3568 
3569  const TargetMachine &TM = getTargetMachine();
3570  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3571  ARMII::TOF TargetFlags = ARMII::MO_NO_FLAG;
3572  if (GV->hasDLLImportStorageClass())
3573  TargetFlags = ARMII::MO_DLLIMPORT;
3574  else if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
3575  TargetFlags = ARMII::MO_COFFSTUB;
3576  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3577  SDValue Result;
3578  SDLoc DL(Op);
3579 
3580  ++NumMovwMovt;
3581 
3582  // FIXME: Once remat is capable of dealing with instructions with register
3583  // operands, expand this into two nodes.
3584  Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT,
3585  DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*offset=*/0,
3586  TargetFlags));
3587  if (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB))
3588  Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
3590  return Result;
3591 }
3592 
3593 SDValue
3594 ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
3595  SDLoc dl(Op);
3596  SDValue Val = DAG.getConstant(0, dl, MVT::i32);
3597  return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl,
3598  DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
3599  Op.getOperand(1), Val);
3600 }
3601 
3602 SDValue
3603 ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
3604  SDLoc dl(Op);
3605  return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
3606  Op.getOperand(1), DAG.getConstant(0, dl, MVT::i32));
3607 }
3608 
3609 SDValue ARMTargetLowering::LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
3610  SelectionDAG &DAG) const {
3611  SDLoc dl(Op);
3613  Op.getOperand(0));
3614 }
3615 
3616 SDValue ARMTargetLowering::LowerINTRINSIC_VOID(
3617  SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) const {
3618  unsigned IntNo =
3619  cast<ConstantSDNode>(
3621  ->getZExtValue();
3622  switch (IntNo) {
3623  default:
3624  return SDValue(); // Don't custom lower most intrinsics.
3625  case Intrinsic::arm_gnu_eabi_mcount: {
3626  MachineFunction &MF = DAG.getMachineFunction();
3627  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3628  SDLoc dl(Op);
3629  SDValue Chain = Op.getOperand(0);
3630  // call "\01__gnu_mcount_nc"
3631  const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
3632  const uint32_t *Mask =
3634  assert(Mask && "Missing call preserved mask for calling convention");
3635  // Mark LR an implicit live-in.
3636  unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
3637  SDValue ReturnAddress =
3638  DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, PtrVT);
3639  std::vector<EVT> ResultTys = {MVT::Other, MVT::Glue};
3640  SDValue Callee =
3641  DAG.getTargetExternalSymbol("\01__gnu_mcount_nc", PtrVT, 0);
3642  SDValue RegisterMask = DAG.getRegisterMask(Mask);
3643  if (Subtarget->isThumb())
3644  return SDValue(
3645  DAG.getMachineNode(
3646  ARM::tBL_PUSHLR, dl, ResultTys,
3647  {ReturnAddress, DAG.getTargetConstant(ARMCC::AL, dl, PtrVT),
3648  DAG.getRegister(0, PtrVT), Callee, RegisterMask, Chain}),
3649  0);
3650  return SDValue(
3651  DAG.getMachineNode(ARM::BL_PUSHLR, dl, ResultTys,
3652  {ReturnAddress, Callee, RegisterMask, Chain}),
3653  0);
3654  }
3655  }
3656 }
3657 
3658 SDValue
3659 ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
3660  const ARMSubtarget *Subtarget) const {
3661  unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3662  SDLoc dl(Op);
3663  switch (IntNo) {
3664  default: return SDValue(); // Don't custom lower most intrinsics.
3665  case Intrinsic::thread_pointer: {
3666  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3667  return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
3668  }
3669  case Intrinsic::arm_cls: {
3670  const SDValue &Operand = Op.getOperand(1);
3671  const EVT VTy = Op.getValueType();
3672  SDValue SRA =
3673  DAG.getNode(ISD::SRA, dl, VTy, Operand, DAG.getConstant(31, dl, VTy));
3674  SDValue XOR = DAG.getNode(ISD::XOR, dl, VTy, SRA, Operand);
3675  SDValue SHL =
3676  DAG.getNode(ISD::SHL, dl, VTy, XOR, DAG.getConstant(1, dl, VTy));
3677  SDValue OR =
3678  DAG.getNode(ISD::OR, dl, VTy, SHL, DAG.getConstant(1, dl, VTy));
3679  SDValue Result = DAG.getNode(ISD::CTLZ, dl, VTy, OR);
3680  return Result;
3681  }
3682  case Intrinsic::arm_cls64: {
3683  // cls(x) = if cls(hi(x)) != 31 then cls(hi(x))
3684  // else 31 + clz(if hi(x) == 0 then lo(x) else not(lo(x)))
3685  const SDValue &Operand = Op.getOperand(1);
3686  const EVT VTy = Op.getValueType();
3687 
3688  SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VTy, Operand,
3689  DAG.getConstant(1, dl, VTy));
3690  SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VTy, Operand,
3691  DAG.getConstant(0, dl, VTy));
3692  SDValue Constant0 = DAG.getConstant(0, dl, VTy);
3693  SDValue Constant1 = DAG.getConstant(1, dl, VTy);
3694  SDValue Constant31 = DAG.getConstant(31, dl, VTy);
3695  SDValue SRAHi = DAG.getNode(ISD::SRA, dl, VTy, Hi, Constant31);
3696  SDValue XORHi = DAG.getNode(ISD::XOR, dl, VTy, SRAHi, Hi);
3697  SDValue SHLHi = DAG.getNode(ISD::SHL, dl, VTy, XORHi, Constant1);
3698  SDValue ORHi = DAG.getNode(ISD::OR, dl, VTy, SHLHi, Constant1);
3699  SDValue CLSHi = DAG.getNode(ISD::CTLZ, dl, VTy, ORHi);
3700  SDValue CheckLo =
3701  DAG.getSetCC(dl, MVT::i1, CLSHi, Constant31, ISD::CondCode::SETEQ);
3702  SDValue HiIsZero =
3703  DAG.getSetCC(dl, MVT::i1, Hi, Constant0, ISD::CondCode::SETEQ);
3704  SDValue AdjustedLo =
3705  DAG.getSelect(dl, VTy, HiIsZero, Lo, DAG.getNOT(dl, Lo, VTy));
3706  SDValue CLZAdjustedLo = DAG.getNode(ISD::CTLZ, dl, VTy, AdjustedLo);
3707  SDValue Result =
3708  DAG.getSelect(dl, VTy, CheckLo,
3709  DAG.getNode(ISD::ADD, dl, VTy, CLZAdjustedLo, Constant31), CLSHi);
3710  return Result;
3711  }
3712  case Intrinsic::eh_sjlj_lsda: {
3713  MachineFunction &MF = DAG.getMachineFunction();
3715  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3716  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3717  SDValue CPAddr;
3718  bool IsPositionIndependent = isPositionIndependent();
3719  unsigned PCAdj = IsPositionIndependent ? (Subtarget->isThumb() ? 4 : 8) : 0;
3720  ARMConstantPoolValue *CPV =
3721  ARMConstantPoolConstant::Create(&MF.getFunction(), ARMPCLabelIndex,
3722  ARMCP::CPLSDA, PCAdj);
3723  CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3724  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3725  SDValue Result = DAG.getLoad(
3726  PtrVT, dl, DAG.getEntryNode(), CPAddr,
3728 
3729  if (IsPositionIndependent) {
3730  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3731  Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
3732  }
3733  return Result;
3734  }
3735  case Intrinsic::arm_neon_vabs:
3736  return DAG.getNode(ISD::ABS, SDLoc(Op), Op.getValueType(),
3737  Op.getOperand(1));
3738  case Intrinsic::arm_neon_vmulls:
3739  case Intrinsic::arm_neon_vmullu: {
3740  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)
3742  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3743  Op.getOperand(1), Op.getOperand(2));
3744  }
3745  case Intrinsic::arm_neon_vminnm:
3746  case Intrinsic::arm_neon_vmaxnm: {
3747  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminnm)
3749  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3750  Op.getOperand(1), Op.getOperand(2));
3751  }
3752  case Intrinsic::arm_neon_vminu:
3753  case Intrinsic::arm_neon_vmaxu: {
3754  if (Op.getValueType().isFloatingPoint())
3755  return SDValue();
3756  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminu)
3757  ? ISD::UMIN : ISD::UMAX;
3758  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3759  Op.getOperand(1), Op.getOperand(2));
3760  }
3761  case Intrinsic::arm_neon_vmins:
3762  case Intrinsic::arm_neon_vmaxs: {
3763  // v{min,max}s is overloaded between signed integers and floats.
3764  if (!Op.getValueType().isFloatingPoint()) {
3765  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
3766  ? ISD::SMIN : ISD::SMAX;
3767  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3768  Op.getOperand(1), Op.getOperand(2));
3769  }
3770  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
3772  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3773  Op.getOperand(1), Op.getOperand(2));
3774  }
3775  case Intrinsic::arm_neon_vtbl1:
3776  return DAG.getNode(ARMISD::VTBL1, SDLoc(Op), Op.getValueType(),
3777  Op.getOperand(1), Op.getOperand(2));
3778  case Intrinsic::arm_neon_vtbl2:
3779  return DAG.getNode(ARMISD::VTBL2, SDLoc(Op), Op.getValueType(),
3780  Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
3781  case Intrinsic::arm_mve_pred_i2v:
3782  case Intrinsic::arm_mve_pred_v2i:
3783  return DAG.getNode(ARMISD::PREDICATE_CAST, SDLoc(Op), Op.getValueType(),
3784  Op.getOperand(1));
3785  }
3786 }
3787 
3789  const ARMSubtarget *Subtarget) {
3790  SDLoc dl(Op);
3791  ConstantSDNode *SSIDNode = cast<ConstantSDNode>(Op.getOperand(2));
3792  auto SSID = static_cast<SyncScope::ID>(SSIDNode->getZExtValue());
3793  if (SSID == SyncScope::SingleThread)
3794  return Op;
3795 
3796  if (!Subtarget->hasDataBarrier()) {
3797  // Some ARMv6 cpus can support data barriers with an mcr instruction.
3798  // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
3799  // here.
3800  assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
3801  "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!");
3802  return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
3803  DAG.getConstant(0, dl, MVT::i32));
3804  }
3805 
3806  ConstantSDNode *OrdN = cast<ConstantSDNode>(Op.getOperand(1));
3807  AtomicOrdering Ord = static_cast<AtomicOrdering>(OrdN->getZExtValue());
3808  ARM_MB::MemBOpt Domain = ARM_MB::ISH;
3809  if (Subtarget->isMClass()) {
3810  // Only a full system barrier exists in the M-class architectures.
3811  Domain = ARM_MB::SY;
3812  } else if (Subtarget->preferISHSTBarriers() &&
3813  Ord == AtomicOrdering::Release) {
3814  // Swift happens to implement ISHST barriers in a way that's compatible with
3815  // Release semantics but weaker than ISH so we'd be fools not to use
3816  // it. Beware: other processors probably don't!
3817  Domain = ARM_MB::ISHST;
3818  }
3819 
3820  return DAG.getNode(ISD::INTRINSIC_VOID, dl, MVT::Other, Op.getOperand(0),
3821  DAG.getConstant(Intrinsic::arm_dmb, dl, MVT::i32),
3822  DAG.getConstant(Domain, dl, MVT::i32));
3823 }
3824 
3826  const ARMSubtarget *Subtarget) {
3827  // ARM pre v5TE and Thumb1 does not have preload instructions.
3828  if (!(Subtarget->isThumb2() ||
3829  (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())))
3830  // Just preserve the chain.
3831  return Op.getOperand(0);
3832 
3833  SDLoc dl(Op);
3834  unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1;
3835  if (!isRead &&
3836  (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))
3837  // ARMv7 with MP extension has PLDW.
3838  return Op.getOperand(0);
3839 
3840  unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
3841  if (Subtarget->isThumb()) {
3842  // Invert the bits.
3843  isRead = ~isRead & 1;
3844  isData = ~isData & 1;
3845  }
3846 
3847  return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),
3848  Op.getOperand(1), DAG.getConstant(isRead, dl, MVT::i32),
3849  DAG.getConstant(isData, dl, MVT::i32));
3850 }
3851 
3853  MachineFunction &MF = DAG.getMachineFunction();
3854  ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();
3855 
3856  // vastart just stores the address of the VarArgsFrameIndex slot into the
3857  // memory location argument.
3858  SDLoc dl(Op);
3859  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
3860  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3861  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3862  return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
3863  MachinePointerInfo(SV));
3864 }
3865 
3866 SDValue ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA,
3867  CCValAssign &NextVA,
3868  SDValue &Root,
3869  SelectionDAG &DAG,
3870  const SDLoc &dl) const {
3871  MachineFunction &MF = DAG.getMachineFunction();
3873 
3874  const TargetRegisterClass *RC;
3875  if (AFI->isThumb1OnlyFunction())
3876  RC = &ARM::tGPRRegClass;
3877  else
3878  RC = &ARM::GPRRegClass;
3879 
3880  // Transform the arguments stored in physical registers into virtual ones.
3881  unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3882  SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
3883 
3884  SDValue ArgValue2;
3885  if (NextVA.isMemLoc()) {
3886  MachineFrameInfo &MFI = MF.getFrameInfo();
3887  int FI = MFI.CreateFixedObject(4, NextVA.getLocMemOffset(), true);
3888 
3889  // Create load node to retrieve arguments from the stack.
3890  SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3891  ArgValue2 = DAG.getLoad(
3892  MVT::i32, dl, Root, FIN,
3894  } else {
3895  Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
3896  ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
3897  }
3898  if (!Subtarget->isLittle())
3899  std::swap (ArgValue, ArgValue2);
3900  return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
3901 }
3902 
3903 // The remaining GPRs hold either the beginning of variable-argument
3904 // data, or the beginning of an aggregate passed by value (usually
3905 // byval). Either way, we allocate stack slots adjacent to the data
3906 // provided by our caller, and store the unallocated registers there.
3907 // If this is a variadic function, the va_list pointer will begin with
3908 // these values; otherwise, this reassembles a (byval) structure that
3909 // was split between registers and memory.
3910 // Return: The frame index registers were stored into.
3911 int ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
3912  const SDLoc &dl, SDValue &Chain,
3913  const Value *OrigArg,
3914  unsigned InRegsParamRecordIdx,
3915  int ArgOffset, unsigned ArgSize) const {
3916  // Currently, two use-cases possible:
3917  // Case #1. Non-var-args function, and we meet first byval parameter.
3918  // Setup first unallocated register as first byval register;
3919  // eat all remained registers
3920  // (these two actions are performed by HandleByVal method).
3921  // Then, here, we initialize stack frame with
3922  // "store-reg" instructions.
3923  // Case #2. Var-args function, that doesn't contain byval parameters.
3924  // The same: eat all remained unallocated registers,
3925  // initialize stack frame.
3926 
3927  MachineFunction &MF = DAG.getMachineFunction();
3928  MachineFrameInfo &MFI = MF.getFrameInfo();
3930  unsigned RBegin, REnd;
3931  if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
3932  CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
3933  } else {
3934  unsigned RBeginIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
3935  RBegin = RBeginIdx == 4 ? (unsigned)ARM::R4 : GPRArgRegs[RBeginIdx];
3936  REnd = ARM::R4;
3937  }
3938 
3939  if (REnd != RBegin)
3940  ArgOffset = -4 * (ARM::R4 - RBegin);
3941 
3942  auto PtrVT = getPointerTy(DAG.getDataLayout());
3943  int FrameIndex = MFI.CreateFixedObject(ArgSize, ArgOffset, false);
3944  SDValue FIN = DAG.getFrameIndex(FrameIndex, PtrVT);
3945 
3946  SmallVector<SDValue, 4> MemOps;
3947  const TargetRegisterClass *RC =
3948  AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
3949 
3950  for (unsigned Reg = RBegin, i = 0; Reg < REnd; ++Reg, ++i) {
3951  unsigned VReg = MF.addLiveIn(Reg, RC);
3952  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
3953  SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
3954  MachinePointerInfo(OrigArg, 4 * i));
3955  MemOps.push_back(Store);
3956  FIN = DAG.getNode(ISD::ADD, dl, PtrVT, FIN, DAG.getConstant(4, dl, PtrVT));
3957  }
3958 
3959  if (!MemOps.empty())
3960  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3961  return FrameIndex;
3962 }
3963 
3964 // Setup stack frame, the va_list pointer will start from.
3965 void ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
3966  const SDLoc &dl, SDValue &Chain,
3967  unsigned ArgOffset,
3968  unsigned TotalArgRegsSaveSize,
3969  bool ForceMutable) const {
3970  MachineFunction &MF = DAG.getMachineFunction();
3972 
3973  // Try to store any remaining integer argument regs
3974  // to their spots on the stack so that they may be loaded by dereferencing
3975  // the result of va_next.
3976  // If there is no regs to be stored, just point address after last
3977  // argument passed via stack.
3978  int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr,
3979  CCInfo.getInRegsParamsCount(),
3980  CCInfo.getNextStackOffset(),
3981  std::max(4U, TotalArgRegsSaveSize));
3982  AFI->setVarArgsFrameIndex(FrameIndex);
3983 }
3984 
3985 SDValue ARMTargetLowering::LowerFormalArguments(
3986  SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3987  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3988  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3989  MachineFunction &MF = DAG.getMachineFunction();
3990  MachineFrameInfo &MFI = MF.getFrameInfo();
3991 
3993 
3994  // Assign locations to all of the incoming arguments.
3996  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
3997  *DAG.getContext());
3998  CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
3999 
4000  SmallVector<SDValue, 16> ArgValues;
4001  SDValue ArgValue;
4003  unsigned CurArgIdx = 0;
4004 
4005  // Initially ArgRegsSaveSize is zero.
4006  // Then we increase this value each time we meet byval parameter.
4007  // We also increase this value in case of varargs function.
4008  AFI->setArgRegsSaveSize(0);
4009 
4010  // Calculate the amount of stack space that we need to allocate to store
4011  // byval and variadic arguments that are passed in registers.
4012  // We need to know this before we allocate the first byval or variadic
4013  // argument, as they will be allocated a stack slot below the CFA (Canonical
4014  // Frame Address, the stack pointer at entry to the function).
4015  unsigned ArgRegBegin = ARM::R4;
4016  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4017  if (CCInfo.getInRegsParamsProcessed() >= CCInfo.getInRegsParamsCount())
4018  break;
4019 
4020  CCValAssign &VA = ArgLocs[i];
4021  unsigned Index = VA.getValNo();
4022  ISD::ArgFlagsTy Flags = Ins[Index].Flags;
4023  if (!Flags.isByVal())
4024  continue;
4025 
4026  assert(VA.isMemLoc() && "unexpected byval pointer in reg");
4027  unsigned RBegin, REnd;
4028  CCInfo.getInRegsParamInfo(CCInfo.getInRegsParamsProcessed(), RBegin, REnd);
4029  ArgRegBegin = std::min(ArgRegBegin, RBegin);
4030 
4031  CCInfo.nextInRegsParam();
4032  }
4033  CCInfo.rewindByValRegsInfo();
4034 
4035  int lastInsIndex = -1;
4036  if (isVarArg && MFI.hasVAStart()) {
4037  unsigned RegIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
4038  if (RegIdx != array_lengthof(GPRArgRegs))
4039  ArgRegBegin = std::min(ArgRegBegin, (unsigned)GPRArgRegs[RegIdx]);
4040  }
4041 
4042  unsigned TotalArgRegsSaveSize = 4 * (ARM::R4 - ArgRegBegin);
4043  AFI->setArgRegsSaveSize(TotalArgRegsSaveSize);
4044  auto PtrVT = getPointerTy(DAG.getDataLayout());
4045 
4046  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4047  CCValAssign &VA = ArgLocs[i];
4048  if (Ins[VA.getValNo()].isOrigArg()) {
4049  std::advance(CurOrigArg,
4050  Ins[VA.getValNo()].getOrigArgIndex() - CurArgIdx);
4051  CurArgIdx = Ins[VA.getValNo()].getOrigArgIndex();
4052  }
4053  // Arguments stored in registers.
4054  if (VA.isRegLoc()) {
4055  EVT RegVT = VA.getLocVT();
4056 
4057  if (VA.needsCustom()) {
4058  // f64 and vector types are split up into multiple registers or
4059  // combinations of registers and stack slots.
4060  if (VA.getLocVT() == MVT::v2f64) {
4061  SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i],
4062  Chain, DAG, dl);
4063  VA = ArgLocs[++i]; // skip ahead to next loc
4064  SDValue ArgValue2;
4065  if (VA.isMemLoc()) {
4066  int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), true);
4067  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4068  ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
4070  DAG.getMachineFunction(), FI));
4071  } else {
4072  ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
4073  Chain, DAG, dl);
4074  }
4075  ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
4076  ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
4077  ArgValue, ArgValue1,
4078  DAG.getIntPtrConstant(0, dl));
4079  ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
4080  ArgValue, ArgValue2,
4081  DAG.getIntPtrConstant(1, dl));
4082  } else
4083  ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
4084  } else {
4085  const TargetRegisterClass *RC;
4086 
4087 
4088  if (RegVT == MVT::f16)
4089  RC = &ARM::HPRRegClass;
4090  else if (RegVT == MVT::f32)
4091  RC = &ARM::SPRRegClass;
4092  else if (RegVT == MVT::f64 || RegVT == MVT::v4f16)
4093  RC = &ARM::DPRRegClass;
4094  else if (RegVT == MVT::v2f64 || RegVT == MVT::v8f16)
4095  RC = &ARM::QPRRegClass;
4096  else if (RegVT == MVT::i32)
4097  RC = AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass
4098  : &ARM::GPRRegClass;
4099  else
4100  llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
4101 
4102  // Transform the arguments in physical registers into virtual ones.
4103  unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
4104  ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
4105 
4106  // If this value is passed in r0 and has the returned attribute (e.g.
4107  // C++ 'structors), record this fact for later use.
4108  if (VA.getLocReg() == ARM::R0 && Ins[VA.getValNo()].Flags.isReturned()) {
4109  AFI->setPreservesR0();
4110  }
4111  }
4112 
4113  // If this is an 8 or 16-bit value, it is really passed promoted
4114  // to 32 bits. Insert an assert[sz]ext to capture this, then
4115  // truncate to the right size.
4116  switch (VA.getLocInfo()) {
4117  default: llvm_unreachable("Unknown loc info!");
4118  case CCValAssign::Full: break;
4119  case CCValAssign::BCvt:
4120  ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
4121  break;
4122  case CCValAssign::SExt:
4123  ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
4124  DAG.getValueType(VA.getValVT()));
4125  ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
4126  break;
4127  case CCValAssign::ZExt:
4128  ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
4129  DAG.getValueType(VA.getValVT()));
4130  ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
4131  break;
4132  }
4133 
4134  InVals.push_back(ArgValue);
4135  } else { // VA.isRegLoc()
4136  // sanity check
4137  assert(VA.isMemLoc());
4138  assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
4139 
4140  int index = VA.getValNo();
4141 
4142  // Some Ins[] entries become multiple ArgLoc[] entries.
4143  // Process them only once.
4144  if (index != lastInsIndex)
4145  {
4146  ISD::ArgFlagsTy Flags = Ins[index].Flags;
4147  // FIXME: For now, all byval parameter objects are marked mutable.
4148  // This can be changed with more analysis.
4149  // In case of tail call optimization mark all arguments mutable.
4150  // Since they could be overwritten by lowering of arguments in case of
4151  // a tail call.
4152  if (Flags.isByVal()) {
4153  assert(Ins[index].isOrigArg() &&
4154  "Byval arguments cannot be implicit");
4155  unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed();
4156 
4157  int FrameIndex = StoreByValRegs(
4158  CCInfo, DAG, dl, Chain, &*CurOrigArg, CurByValIndex,
4159  VA.getLocMemOffset(), Flags.getByValSize());
4160  InVals.push_back(DAG.getFrameIndex(FrameIndex, PtrVT));
4161  CCInfo.nextInRegsParam();
4162  } else {
4163  unsigned FIOffset = VA.getLocMemOffset();
4164  int FI = MFI.CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
4165  FIOffset, true);
4166 
4167  // Create load nodes to retrieve arguments from the stack.
4168  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4169  InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
4171  DAG.getMachineFunction(), FI)));
4172  }
4173  lastInsIndex = index;
4174  }
4175  }
4176  }
4177 
4178  // varargs
4179  if (isVarArg && MFI.hasVAStart())
4180  VarArgStyleRegisters(CCInfo, DAG, dl, Chain,
4181  CCInfo.getNextStackOffset(),
4182  TotalArgRegsSaveSize);
4183 
4185 
4186  return Chain;
4187 }
4188 
4189 /// isFloatingPointZero - Return true if this is +0.0.
4190 static bool isFloatingPointZero(SDValue Op) {
4191  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
4192  return CFP->getValueAPF().isPosZero();
4193  else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
4194  // Maybe this has already been legalized into the constant pool?
4195  if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
4196  SDValue WrapperOp = Op.getOperand(1).getOperand(0);
4197  if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))
4198  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
4199  return CFP->getValueAPF().isPosZero();
4200  }
4201  } else if (Op->getOpcode() == ISD::BITCAST &&
4202  Op->getValueType(0) == MVT::f64) {
4203  // Handle (ISD::BITCAST (ARMISD::VMOVIMM (ISD::TargetConstant 0)) MVT::f64)
4204  // created by LowerConstantFP().
4205  SDValue BitcastOp = Op->getOperand(0);
4206  if (BitcastOp->getOpcode() == ARMISD::VMOVIMM &&
4207  isNullConstant(BitcastOp->getOperand(0)))
4208  return true;
4209  }
4210  return false;
4211 }
4212 
4213 /// Returns appropriate ARM CMP (cmp) and corresponding condition code for
4214 /// the given operands.
4215 SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
4216  SDValue &ARMcc, SelectionDAG &DAG,
4217  const SDLoc &dl) const {
4218  if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
4219  unsigned C = RHSC->getZExtValue();
4220  if (!isLegalICmpImmediate((int32_t)C)) {
4221  // Constant does not fit, try adjusting it by one.
4222  switch (CC) {
4223  default: break;
4224  case ISD::SETLT:
4225  case ISD::SETGE:
4226  if (C != 0x80000000 && isLegalICmpImmediate(C-1)) {
4227  CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
4228  RHS = DAG.getConstant(C - 1, dl, MVT::i32);
4229  }
4230  break;
4231  case ISD::SETULT:
4232  case ISD::SETUGE:
4233  if (C != 0 && isLegalICmpImmediate(C-1)) {
4234  CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
4235  RHS = DAG.getConstant(C - 1, dl, MVT::i32);
4236  }
4237  break;
4238  case ISD::SETLE:
4239  case ISD::SETGT:
4240  if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) {
4241  CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
4242  RHS = DAG.getConstant(C + 1, dl, MVT::i32);
4243  }
4244  break;
4245  case ISD::SETULE:
4246  case ISD::SETUGT:
4247  if (C != 0xffffffff && isLegalICmpImmediate(C+1)) {
4248  CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
4249  RHS = DAG.getConstant(C + 1, dl, MVT::i32);
4250  }
4251  break;
4252  }
4253  }
4254  } else if ((ARM_AM::getShiftOpcForNode(LHS.getOpcode()) != ARM_AM::no_shift) &&
4256  // In ARM and Thumb-2, the compare instructions can shift their second
4257  // operand.
4259  std::swap(LHS, RHS);
4260  }
4261 
4262  // Thumb1 has very limited immediate modes, so turning an "and" into a
4263  // shift can save multiple instructions.
4264  //
4265  // If we have (x & C1), and C1 is an appropriate mask, we can transform it
4266  // into "((x << n) >> n)". But that isn't necessarily profitable on its
4267  // own. If it's the operand to an unsigned comparison with an immediate,
4268  // we can eliminate one of the shifts: we transform
4269  // "((x << n) >> n) == C2" to "(x << n) == (C2 << n)".
4270  //
4271  // We avoid transforming cases which aren't profitable due to encoding
4272  // details:
4273  //
4274  // 1. C2 fits into the immediate field of a cmp, and the transformed version
4275  // would not; in that case, we're essentially trading one immediate load for
4276  // another.
4277  // 2. C1 is 255 or 65535, so we can use uxtb or uxth.
4278  // 3. C2 is zero; we have other code for this special case.
4279  //
4280  // FIXME: Figure out profitability for Thumb2; we usually can't save an
4281  // instruction, since the AND is always one instruction anyway, but we could
4282  // use narrow instructions in some cases.
4283  if (Subtarget->isThumb1Only() && LHS->getOpcode() == ISD::AND &&
4284  LHS->hasOneUse() && isa<ConstantSDNode>(LHS.getOperand(1)) &&
4285  LHS.getValueType() == MVT::i32 && isa<ConstantSDNode>(RHS) &&
4286  !isSignedIntSetCC(CC)) {
4287  unsigned Mask = cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue();
4288  auto *RHSC = cast<ConstantSDNode>(RHS.getNode());
4289  uint64_t RHSV = RHSC->getZExtValue();
4290  if (isMask_32(Mask) && (RHSV & ~Mask) == 0 && Mask != 255 && Mask != 65535) {
4291  unsigned ShiftBits = countLeadingZeros(Mask);
4292  if (RHSV && (RHSV > 255 || (RHSV << ShiftBits) <= 255)) {
4293  SDValue ShiftAmt = DAG.getConstant(ShiftBits, dl, MVT::i32);
4294  LHS = DAG.getNode(ISD::SHL, dl, MVT::i32, LHS.getOperand(0), ShiftAmt);
4295  RHS = DAG.getConstant(RHSV << ShiftBits, dl, MVT::i32);
4296  }
4297  }
4298  }
4299 
4300  // The specific comparison "(x<<c) > 0x80000000U" can be optimized to a
4301  // single "lsls x, c+1". The shift sets the "C" and "Z" flags the same
4302  // way a cmp would.
4303  // FIXME: Add support for ARM/Thumb2; this would need isel patterns, and
4304  // some tweaks to the heuristics for the previous and->shift transform.
4305  // FIXME: Optimize cases where the LHS isn't a shift.
4306  if (Subtarget->isThumb1Only() && LHS->getOpcode() == ISD::SHL &&
4307  isa<ConstantSDNode>(RHS) &&
4308  cast<ConstantSDNode>(RHS)->getZExtValue() == 0x80000000U &&
4309  CC == ISD::SETUGT && isa<ConstantSDNode>(LHS.getOperand(1)) &&
4310  cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() < 31) {
4311  unsigned ShiftAmt =
4312  cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() + 1;
4313  SDValue Shift = DAG.getNode(ARMISD::LSLS, dl,
4314  DAG.getVTList(MVT::i32, MVT::i32),
4315  LHS.getOperand(0),
4316  DAG.getConstant(ShiftAmt, dl, MVT::i32));
4317  SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR,
4318  Shift.getValue(1), SDValue());
4319  ARMcc = DAG.getConstant(ARMCC::HI, dl, MVT::i32);
4320  return Chain.getValue(1);
4321  }
4322 
4324 
4325  // If the RHS is a constant zero then the V (overflow) flag will never be
4326  // set. This can allow us to simplify GE to PL or LT to MI, which can be
4327  // simpler for other passes (like the peephole optimiser) to deal with.
4328  if (isNullConstant(RHS)) {
4329  switch (CondCode) {
4330  default: break;
4331  case ARMCC::GE:
4332  CondCode = ARMCC::PL;
4333  break;
4334  case ARMCC::LT:
4335  CondCode = ARMCC::MI;
4336  break;
4337  }
4338  }
4339 
4340  ARMISD::NodeType CompareType;
4341  switch (CondCode) {
4342  default:
4343  CompareType = ARMISD::CMP;
4344  break;
4345  case ARMCC::EQ:
4346  case ARMCC::NE:
4347  // Uses only Z Flag
4348  CompareType = ARMISD::CMPZ;
4349  break;
4350  }
4351  ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
4352  return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS);
4353 }
4354 
4355 /// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
4356 SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS,
4357  SelectionDAG &DAG, const SDLoc &dl,
4358  bool Signaling) const {
4359  assert(Subtarget->hasFP64() || RHS.getValueType() != MVT::f64);
4360  SDValue Cmp;
4361  if (!isFloatingPointZero(RHS))
4362  Cmp = DAG.getNode(Signaling ? ARMISD::CMPFPE : ARMISD::CMPFP,
4363  dl, MVT::Glue, LHS, RHS);
4364  else
4365  Cmp = DAG.getNode(Signaling ? ARMISD::CMPFPEw0 : ARMISD::CMPFPw0,
4366  dl, MVT::Glue, LHS);
4367  return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp);
4368 }
4369 
4370 /// duplicateCmp - Glue values can have only one use, so this function
4371 /// duplicates a comparison node.
4372 SDValue
4373 ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {
4374  unsigned Opc = Cmp.getOpcode();
4375  SDLoc DL(Cmp);
4376  if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ)
4377  return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
4378 
4379  assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation");
4380  Cmp = Cmp.getOperand(0);
4381  Opc = Cmp.getOpcode();
4382  if (Opc == ARMISD::CMPFP)
4383  Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
4384  else {
4385  assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT");
4386  Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0));
4387  }
4388  return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp);
4389 }
4390 
4391 // This function returns three things: the arithmetic computation itself
4392 // (Value), a comparison (OverflowCmp), and a condition code (ARMcc). The
4393 // comparison and the condition code define the case in which the arithmetic
4394 // computation *does not* overflow.
4395 std::pair<SDValue, SDValue>
4396 ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
4397  SDValue &ARMcc) const {
4398  assert(Op.getValueType() == MVT::i32 && "Unsupported value type");
4399 
4400  SDValue Value, OverflowCmp;
4401  SDValue LHS = Op.getOperand(0);
4402  SDValue RHS = Op.getOperand(1);
4403  SDLoc dl(Op);
4404 
4405  // FIXME: We are currently always generating CMPs because we don't support
4406  // generating CMN through the backend. This is not as good as the natural
4407  // CMP case because it causes a register dependency and cannot be folded
4408  // later.
4409 
4410  switch (Op.getOpcode()) {
4411  default:
4412  llvm_unreachable("Unknown overflow instruction!");
4413  case ISD::SADDO:
4414  ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
4415  Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS);
4416  OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
4417  break;
4418  case ISD::UADDO:
4419  ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
4420  // We use ADDC here to correspond to its use in LowerUnsignedALUO.
4421  // We do not use it in the USUBO case as Value may not be used.
4422  Value = DAG.getNode(ARMISD::ADDC, dl,
4423  DAG.getVTList(Op.getValueType(), MVT::i32), LHS, RHS)
4424  .getValue(0);
4425  OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
4426  break;
4427  case ISD::SSUBO:
4428  ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
4429  Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
4430  OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
4431  break;
4432  case ISD::USUBO:
4433  ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
4434  Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
4435  OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
4436  break;
4437  case ISD::UMULO:
4438  // We generate a UMUL_LOHI and then check if the high word is 0.
4439  ARMcc = DAG.getConstant(ARMCC::EQ, dl, MVT::i32);
4440  Value = DAG.getNode(ISD::UMUL_LOHI, dl,
4441  DAG.getVTList(Op.getValueType(), Op.getValueType()),
4442  LHS, RHS);
4443  OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value.getValue(1),
4444  DAG.getConstant(0, dl, MVT::i32));
4445  Value = Value.getValue(0); // We only want the low 32 bits for the result.
4446  break;
4447  case ISD::SMULO:
4448  // We generate a SMUL_LOHI and then check if all the bits of the high word
4449  // are the same as the sign bit of the low word.
4450  ARMcc = DAG.getConstant(ARMCC::EQ, dl, MVT::i32);
4451  Value = DAG.getNode(ISD::SMUL_LOHI, dl,
4452  DAG.getVTList(Op.getValueType(), Op.getValueType()),
4453  LHS, RHS);
4454  OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value.getValue(1),
4455  DAG.getNode(ISD::SRA, dl, Op.getValueType(),
4456  Value.getValue(0),
4457  DAG.getConstant(31, dl, MVT::i32)));
4458  Value = Value.getValue(0); // We only want the low 32 bits for the result.
4459  break;
4460  } // switch (...)
4461 
4462  return std::make_pair(Value, OverflowCmp);
4463 }
4464 
4465 SDValue
4466 ARMTargetLowering::LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const {
4467  // Let legalize expand this if it isn't a legal type yet.
4469  return SDValue();
4470 
4471  SDValue Value, OverflowCmp;
4472  SDValue ARMcc;
4473  std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc);
4474  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4475  SDLoc dl(Op);
4476  // We use 0 and 1 as false and true values.
4477  SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
4478  SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
4479  EVT VT = Op.getValueType();
4480 
4481  SDValue Overflow = DAG.getNode(ARMISD::CMOV, dl, VT, TVal, FVal,
4482  ARMcc, CCR, OverflowCmp);
4483 
4484  SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
4485  return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
4486 }
4487 
4489  SelectionDAG &DAG) {
4490  SDLoc DL(BoolCarry);
4491  EVT CarryVT = BoolCarry.getValueType();
4492 
4493  // This converts the boolean value carry into the carry flag by doing
4494  // ARMISD::SUBC Carry, 1
4495  SDValue Carry = DAG.getNode(ARMISD::SUBC, DL,
4496  DAG.getVTList(CarryVT, MVT::i32),
4497  BoolCarry, DAG.getConstant(1, DL, CarryVT));
4498  return Carry.getValue(1);
4499 }
4500 
4502  SelectionDAG &DAG) {
4503  SDLoc DL(Flags);
4504 
4505  // Now convert the carry flag into a boolean carry. We do this
4506  // using ARMISD:ADDE 0, 0, Carry
4507  return DAG.getNode(ARMISD::ADDE, DL, DAG.getVTList(VT, MVT::i32),
4508  DAG.getConstant(0, DL, MVT::i32),
4509  DAG.getConstant(0, DL, MVT::i32), Flags);
4510 }
4511 
4512 SDValue ARMTargetLowering::LowerUnsignedALUO(SDValue Op,
4513  SelectionDAG &DAG) const {
4514  // Let legalize expand this if it isn't a legal type yet.
4516  return SDValue();
4517 
4518  SDValue LHS = Op.getOperand(0);
4519  SDValue RHS = Op.getOperand(1);
4520  SDLoc dl(Op);
4521 
4522  EVT VT = Op.getValueType();
4523  SDVTList VTs = DAG.getVTList(VT, MVT::i32);
4524  SDValue Value;
4525  SDValue Overflow;
4526  switch (Op.getOpcode()) {
4527  default:
4528  llvm_unreachable("Unknown overflow instruction!");
4529  case ISD::UADDO:
4530  Value = DAG.getNode(ARMISD::ADDC, dl, VTs, LHS, RHS);
4531  // Convert the carry flag into a boolean value.
4532  Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG);
4533  break;
4534  case ISD::USUBO: {
4535  Value = DAG.getNode(ARMISD::SUBC, dl, VTs, LHS, RHS);
4536  // Convert the carry flag into a boolean value.
4537  Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG);
4538  // ARMISD::SUBC returns 0 when we have to borrow, so make it an overflow
4539  // value. So compute 1 - C.
4540  Overflow = DAG.getNode(ISD::SUB, dl, MVT::i32,
4541  DAG.getConstant(1, dl, MVT::i32), Overflow);
4542  break;
4543  }
4544  }
4545 
4546  return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
4547 }
4548 
4550  const ARMSubtarget *Subtarget) {
4551  EVT VT = Op.getValueType();
4552  if (!Subtarget->hasDSP())
4553  return SDValue();
4554  if (!VT.isSimple())
4555  return SDValue();
4556 
4557  unsigned NewOpcode;
4558  bool IsAdd = Op->getOpcode() == ISD::SADDSAT;
4559  switch (VT.getSimpleVT().SimpleTy) {
4560  default:
4561  return SDValue();
4562  case MVT::i8:
4563  NewOpcode = IsAdd ? ARMISD::QADD8b : ARMISD::QSUB8b;
4564  break;
4565  case MVT::i16:
4566  NewOpcode = IsAdd ? ARMISD::QADD16b : ARMISD::QSUB16b;
4567  break;
4568  }
4569 
4570  SDLoc dl(Op);
4571  SDValue Add =
4572  DAG.getNode(NewOpcode, dl, MVT::i32,
4573  DAG.getSExtOrTrunc(Op->getOperand(0), dl, MVT::i32),
4574  DAG.getSExtOrTrunc(Op->getOperand(1), dl, MVT::i32));
4575  return DAG.getNode(ISD::TRUNCATE, dl, VT, Add);
4576 }
4577 
4578 SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
4579  SDValue Cond = Op.getOperand(0);
4580  SDValue SelectTrue = Op.getOperand(1);
4581  SDValue SelectFalse = Op.getOperand(2);
4582  SDLoc dl(Op);
4583  unsigned Opc = Cond.getOpcode();
4584 
4585  if (Cond.getResNo() == 1 &&
4586  (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
4587  Opc == ISD::USUBO)) {
4588  if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0)))
4589  return SDValue();
4590 
4591  SDValue Value, OverflowCmp;
4592  SDValue ARMcc;
4593  std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);
4594  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4595  EVT VT = Op.getValueType();
4596 
4597  return getCMOV(dl, VT, SelectTrue, SelectFalse, ARMcc, CCR,
4598  OverflowCmp, DAG);
4599  }
4600 
4601  // Convert:
4602  //
4603  // (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond)
4604  // (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond)
4605  //
4606  if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) {
4607  const ConstantSDNode *CMOVTrue =
4609  const ConstantSDNode *CMOVFalse =
4611 
4612  if (CMOVTrue && CMOVFalse) {
4613  unsigned CMOVTrueVal = CMOVTrue->getZExtValue();
4614  unsigned CMOVFalseVal = CMOVFalse->getZExtValue();
4615 
4616  SDValue True;
4617  SDValue False;
4618  if (CMOVTrueVal == 1 && CMOVFalseVal == 0) {
4619  True = SelectTrue;
4620  False = SelectFalse;
4621  } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) {
4622  True = SelectFalse;
4623  False = SelectTrue;
4624  }
4625 
4626  if (True.getNode() && False.getNode()) {
4627  EVT VT = Op.getValueType();
4628  SDValue ARMcc = Cond.getOperand(2);
4629  SDValue CCR = Cond.getOperand(3);
4630  SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG);
4631  assert(True.getValueType() == VT);
4632  return getCMOV(dl, VT, True, False, ARMcc, CCR, Cmp, DAG);
4633  }
4634  }
4635  }
4636 
4637  // ARM's BooleanContents value is UndefinedBooleanContent. Mask out the
4638  // undefined bits before doing a full-word comparison with zero.
4639  Cond = DAG.getNode(ISD::AND, dl, Cond.getValueType(), Cond,
4640  DAG.getConstant(1, dl, Cond.getValueType()));
4641 
4642  return DAG.getSelectCC(dl, Cond,
4643  DAG.getConstant(0, dl, Cond.getValueType()),
4644  SelectTrue, SelectFalse, ISD::SETNE);
4645 }
4646 
4648  bool &swpCmpOps, bool &swpVselOps) {
4649  // Start by selecting the GE condition code for opcodes that return true for
4650  // 'equality'
4651  if (CC == ISD::SETUGE || CC == ISD::SETOGE || CC == ISD::SETOLE ||
4652  CC == ISD::SETULE || CC == ISD::SETGE || CC == ISD::SETLE)
4653  CondCode = ARMCC::GE;
4654 
4655  // and GT for opcodes that return false for 'equality'.
4656  else if (CC == ISD::SETUGT || CC == ISD::SETOGT || CC == ISD::SETOLT ||
4657  CC == ISD::SETULT || CC == ISD::SETGT || CC == ISD::SETLT)
4658  CondCode = ARMCC::GT;
4659 
4660  // Since we are constrained to GE/GT, if the opcode contains 'less', we need
4661  // to swap the compare operands.
4662  if (CC == ISD::SETOLE || CC == ISD::SETULE || CC == ISD::SETOLT ||
4663  CC == ISD::SETULT || CC == ISD::SETLE || CC == ISD::SETLT)
4664  swpCmpOps = true;
4665 
4666  // Both GT and GE are ordered comparisons, and return false for 'unordered'.
4667  // If we have an unordered opcode, we need to swap the operands to the VSEL
4668  // instruction (effectively negating the condition).
4669  //
4670  // This also has the effect of swapping which one of 'less' or 'greater'
4671  // returns true, so we also swap the compare operands. It also switches
4672  // whether we return true for 'equality', so we compensate by picking the
4673  // opposite condition code to our original choice.
4674  if (CC == ISD::SETULE || CC == ISD::SETULT || CC == ISD::SETUGE ||
4675  CC == ISD::SETUGT) {
4676  swpCmpOps = !swpCmpOps;
4677  swpVselOps = !swpVselOps;
4678  CondCode = CondCode == ARMCC::GT ? ARMCC::GE : ARMCC::GT;
4679  }
4680 
4681  // 'ordered' is 'anything but unordered', so use the VS condition code and
4682  // swap the VSEL operands.
4683  if (CC == ISD::SETO) {
4684  CondCode = ARMCC::VS;
4685  swpVselOps = true;
4686  }
4687 
4688  // 'unordered or not equal' is 'anything but equal', so use the EQ condition
4689  // code and swap the VSEL operands. Also do this if we don't care about the
4690  // unordered case.
4691  if (CC == ISD::SETUNE || CC == ISD::SETNE) {
4692  CondCode = ARMCC::EQ;
4693  swpVselOps = true;
4694  }
4695 }
4696 
4697 SDValue ARMTargetLowering::getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal,
4698  SDValue TrueVal, SDValue ARMcc, SDValue CCR,
4699  SDValue Cmp, SelectionDAG &DAG) const {
4700  if (!Subtarget->hasFP64() && VT == MVT::f64) {
4701  FalseVal = DAG.getNode(ARMISD::VMOVRRD, dl,
4702  DAG.getVTList(MVT::i32, MVT::i32), FalseVal);
4703  TrueVal = DAG.getNode(ARMISD::VMOVRRD, dl,
4704  DAG.getVTList(MVT::i32, MVT::i32), TrueVal);
4705 
4706  SDValue TrueLow = TrueVal.getValue(0);
4707  SDValue TrueHigh = TrueVal.getValue(1);
4708  SDValue FalseLow = FalseVal.getValue(0);
4709  SDValue FalseHigh = FalseVal.getValue(1);
4710 
4711  SDValue Low = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseLow, TrueLow,
4712  ARMcc, CCR, Cmp);
4713  SDValue High = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseHigh, TrueHigh,
4714  ARMcc, CCR, duplicateCmp(Cmp, DAG));
4715 
4716  return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Low, High);
4717  } else {
4718  return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,
4719  Cmp);
4720  }
4721 }
4722 
4723 static bool isGTorGE(ISD::CondCode CC) {
4724  return CC == ISD::SETGT || CC == ISD::SETGE;
4725 }
4726 
4727 static bool isLTorLE(ISD::CondCode CC) {
4728  return CC == ISD::SETLT || CC == ISD::SETLE;
4729 }
4730 
4731 // See if a conditional (LHS CC RHS ? TrueVal : FalseVal) is lower-saturating.
4732 // All of these conditions (and their <= and >= counterparts) will do:
4733 // x < k ? k : x
4734 // x > k ? x : k
4735 // k < x ? x : k
4736 // k > x ? k : x
4737 static bool isLowerSaturate(const SDValue LHS, const SDValue RHS,
4738  const SDValue TrueVal, const SDValue FalseVal,
4739  const ISD::CondCode CC, const SDValue K) {
4740  return (isGTorGE(CC) &&
4741  ((K == LHS && K == TrueVal) || (K == RHS && K == FalseVal))) ||
4742  (isLTorLE(CC) &&
4743  ((K == RHS && K == TrueVal) || (K == LHS && K == FalseVal)));
4744 }
4745 
4746 // Similar to isLowerSaturate(), but checks for upper-saturating conditions.
4747 static bool isUpperSaturate(const SDValue LHS, const SDValue RHS,
4748  const SDValue TrueVal, const SDValue FalseVal,
4749  const ISD::CondCode CC, const SDValue K) {
4750  return (isGTorGE(CC) &&
4751  ((K == RHS && K == TrueVal) || (K == LHS && K == FalseVal))) ||
4752  (isLTorLE(CC) &&
4753  ((K == LHS && K == TrueVal) || (K == RHS && K == FalseVal)));
4754 }
4755 
4756 // Check if two chained conditionals could be converted into SSAT or USAT.
4757 //
4758 // SSAT can replace a set of two conditional selectors that bound a number to an
4759 // interval of type [k, ~k] when k + 1 is a power of 2. Here are some examples:
4760 //
4761 // x < -k ? -k : (x > k ? k : x)
4762 // x < -k ? -k : (x < k ? x : k)
4763 // x > -k ? (x > k ? k : x) : -k
4764 // x < k ? (x < -k ? -k : x) : k
4765 // etc.
4766 //
4767 // USAT works similarily to SSAT but bounds on the interval [0, k] where k + 1 is
4768 // a power of 2.
4769 //
4770 // It returns true if the conversion can be done, false otherwise.
4771 // Additionally, the variable is returned in parameter V, the constant in K and
4772 // usat is set to true if the conditional represents an unsigned saturation
4773 static bool isSaturatingConditional(const SDValue &Op, SDValue &V,
4774  uint64_t &K, bool &usat) {
4775  SDValue LHS1 = Op.getOperand(0);
4776  SDValue RHS1 = Op.getOperand(1);
4777  SDValue TrueVal1 = Op.getOperand(2);
4778  SDValue FalseVal1 = Op.getOperand(3);
4779  ISD::CondCode CC1 = cast<CondCodeSDNode>(Op.getOperand(4))->get();
4780 
4781  const SDValue Op2 = isa<ConstantSDNode>(TrueVal1) ? FalseVal1 : TrueVal1;
4782  if (Op2.getOpcode() != ISD::SELECT_CC)
4783  return false;
4784 
4785  SDValue LHS2 = Op2.getOperand(0);
4786  SDValue RHS2 = Op2.getOperand(1);
4787  SDValue TrueVal2 = Op2.getOperand(2);
4788  SDValue FalseVal2 = Op2.getOperand(3);
4789  ISD::CondCode CC2 = cast<CondCodeSDNode>(Op2.getOperand(4))->get();
4790 
4791  // Find out which are the constants and which are the variables
4792  // in each conditional
4793  SDValue *K1 = isa<ConstantSDNode>(LHS1) ? &LHS1 : isa<ConstantSDNode>(RHS1)
4794  ? &RHS1
4795  : nullptr;
4796  SDValue *K2 = isa<ConstantSDNode>(LHS2) ? &LHS2 : isa<ConstantSDNode>(RHS2)
4797  ? &RHS2
4798  : nullptr;
4799  SDValue K2Tmp = isa<ConstantSDNode>(TrueVal2) ? TrueVal2 : FalseVal2;
4800  SDValue V1Tmp = (K1 && *K1 == LHS1) ? RHS1 : LHS1;
4801  SDValue V2Tmp = (K2 && *K2 == LHS2) ? RHS2 : LHS2;
4802  SDValue V2 = (K2Tmp == TrueVal2) ? FalseVal2 : TrueVal2;
4803 
4804  // We must detect cases where the original operations worked with 16- or
4805  // 8-bit values. In such case, V2Tmp != V2 because the comparison operations
4806  // must work with sign-extended values but the select operations return
4807  // the original non-extended value.
4808  SDValue V2TmpReg = V2Tmp;
4809  if (V2Tmp->getOpcode() == ISD::SIGN_EXTEND_INREG)
4810  V2TmpReg = V2Tmp->getOperand(0);
4811 
4812  // Check that the registers and the constants have the correct values
4813  // in both conditionals
4814  if (!K1 || !K2 || *K1 == Op2 || *K2 != K2Tmp || V1Tmp != V2Tmp ||
4815  V2TmpReg != V2)
4816  return false;
4817 
4818  // Figure out which conditional is saturating the lower/upper bound.
4819  const SDValue *LowerCheckOp =
4820  isLowerSaturate(LHS1, RHS1, TrueVal1, FalseVal1, CC1, *K1)
4821  ? &Op
4822  : isLowerSaturate(LHS2, RHS2, TrueVal2, FalseVal2, CC2, *K2)
4823  ? &Op2
4824  : nullptr;
4825  const SDValue *UpperCheckOp =
4826  isUpperSaturate(LHS1, RHS1, TrueVal1, FalseVal1, CC1, *K1)
4827  ? &Op
4828  : isUpperSaturate(LHS2, RHS2, TrueVal2, FalseVal2, CC2, *K2)
4829  ? &Op2
4830  : nullptr;
4831 
4832  if (!UpperCheckOp || !LowerCheckOp || LowerCheckOp == UpperCheckOp)
4833  return false;
4834 
4835  // Check that the constant in the lower-bound check is
4836  // the opposite of the constant in the upper-bound check
4837  // in 1's complement.
4838  int64_t Val1 = cast<ConstantSDNode>(*K1)->getSExtValue();
4839  int64_t Val2 = cast<ConstantSDNode>(*K2)->getSExtValue();
4840  int64_t PosVal = std::max(Val1, Val2);
4841  int64_t NegVal = std::min(Val1, Val2);
4842 
4843  if (((Val1 > Val2 && UpperCheckOp == &Op) ||
4844  (Val1 < Val2 && UpperCheckOp == &Op2)) &&
4845  isPowerOf2_64(PosVal + 1)) {
4846 
4847  // Handle the difference between USAT (unsigned) and SSAT (signed) saturation
4848  if (Val1 == ~Val2)
4849  usat = false;
4850  else if (NegVal == 0)
4851  usat = true;
4852  else
4853  return false;
4854 
4855  V = V2;
4856  K = (uint64_t)PosVal; // At this point, PosVal is guaranteed to be positive
4857 
4858  return true;
4859  }
4860 
4861  return false;
4862 }
4863 
4864 // Check if a condition of the type x < k ? k : x can be converted into a
4865 // bit operation instead of conditional moves.
4866 // Currently this is allowed given:
4867 // - The conditions and values match up
4868 // - k is 0 or -1 (all ones)
4869 // This function will not check the last condition, thats up to the caller
4870 // It returns true if the transformation can be made, and in such case
4871 // returns x in V, and k in SatK.
4873  SDValue &SatK)
4874 {
4875  SDValue LHS = Op.getOperand(0);
4876  SDValue RHS = Op.getOperand(1);
4877  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
4878  SDValue TrueVal = Op.getOperand(2);
4879  SDValue FalseVal = Op.getOperand(3);
4880 
4881  SDValue *K = isa<ConstantSDNode>(LHS) ? &LHS : isa<ConstantSDNode>(RHS)
4882  ? &RHS
4883  : nullptr;
4884 
4885  // No constant operation in comparison, early out
4886  if (!K)
4887  return false;
4888 
4889  SDValue KTmp = isa<ConstantSDNode>(TrueVal) ? TrueVal : FalseVal;
4890  V = (KTmp == TrueVal) ? FalseVal : TrueVal;
4891  SDValue VTmp = (K && *K == LHS) ? RHS : LHS;
4892 
4893  // If the constant on left and right side, or variable on left and right,
4894  // does not match, early out
4895  if (*K != KTmp || V != VTmp)
4896  return false;
4897 
4898  if (isLowerSaturate(LHS, RHS, TrueVal, FalseVal, CC, *K)) {
4899  SatK = *K;
4900  return true;
4901  }
4902 
4903  return false;
4904 }
4905 
4906 bool ARMTargetLowering::isUnsupportedFloatingType(EVT VT) const {
4907  if (VT == MVT::f32)
4908  return !Subtarget->hasVFP2Base();
4909  if (VT == MVT::f64)
4910  return !Subtarget->hasFP64();
4911  if (VT == MVT::f16)
4912  return !Subtarget->hasFullFP16();
4913  return false;
4914 }
4915 
4916 SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
4917  EVT VT = Op.getValueType();
4918  SDLoc dl(Op);
4919 
4920  // Try to convert two saturating conditional selects into a single SSAT
4921  SDValue SatValue;
4922  uint64_t SatConstant;
4923  bool SatUSat;
4924  if (((!Subtarget->isThumb() && Subtarget->hasV6Ops()) || Subtarget->isThumb2()) &&
4925  isSaturatingConditional(Op, SatValue, SatConstant, SatUSat)) {
4926  if (SatUSat)
4927  return DAG.getNode(ARMISD::USAT, dl, VT, SatValue,
4928  DAG.getConstant(countTrailingOnes(SatConstant), dl, VT));
4929  else
4930  return DAG.getNode(ARMISD::SSAT, dl, VT, SatValue,
4931  DAG.getConstant(countTrailingOnes(SatConstant), dl, VT));
4932  }
4933 
4934  // Try to convert expressions of the form x < k ? k : x (and similar forms)
4935  // into more efficient bit operations, which is possible when k is 0 or -1
4936  // On ARM and Thumb-2 which have flexible operand 2 this will result in
4937  // single instructions. On Thumb the shift and the bit operation will be two
4938  // instructions.
4939  // Only allow this transformation on full-width (32-bit) operations
4940  SDValue LowerSatConstant;
4941  if (VT == MVT::i32 &&
4942  isLowerSaturatingConditional(Op, SatValue, LowerSatConstant)) {
4943  SDValue ShiftV = DAG.getNode(ISD::SRA, dl, VT, SatValue,
4944  DAG.getConstant(31, dl, VT));
4945  if (isNullConstant(LowerSatConstant)) {
4946  SDValue NotShiftV = DAG.getNode(ISD::XOR, dl, VT, ShiftV,
4947  DAG.getAllOnesConstant(dl, VT));
4948  return DAG.getNode(ISD::AND, dl, VT, SatValue, NotShiftV);
4949  } else if (isAllOnesConstant(LowerSatConstant))
4950  return DAG.getNode(ISD::OR, dl, VT, SatValue, ShiftV);
4951  }
4952 
4953  SDValue LHS = Op.getOperand(0);
4954  SDValue RHS = Op.getOperand(1);
4955  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
4956  SDValue TrueVal = Op.getOperand(2);
4957  SDValue FalseVal = Op.getOperand(3);
4958  ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FalseVal);
4959  ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TrueVal);
4960 
4961  if (Subtarget->hasV8_1MMainlineOps() && CFVal && CTVal &&
4962  LHS.getValueType() == MVT::i32 && RHS.getValueType() == MVT::i32) {
4963  unsigned TVal = CTVal->getZExtValue();
4964  unsigned FVal = CFVal->getZExtValue();
4965  unsigned Opcode = 0;
4966 
4967  if (TVal == ~FVal) {
4968  Opcode = ARMISD::CSINV;
4969  } else if (TVal == ~FVal + 1) {
4970  Opcode = ARMISD::CSNEG;
4971  } else if (TVal + 1 == FVal) {
4972  Opcode = ARMISD::CSINC;
4973  } else if (TVal == FVal + 1) {
4974  Opcode = ARMISD::CSINC;
4975  std::swap(TrueVal, FalseVal);
4976  std::swap(TVal, FVal);
4977  CC = ISD::getSetCCInverse(CC, LHS.getValueType());
4978  }
4979 
4980  if (Opcode) {
4981  // If one of the constants is cheaper than another, materialise the
4982  // cheaper one and let the csel generate the other.
4983  if (Opcode != ARMISD::CSINC &&
4984  HasLowerConstantMaterializationCost(FVal, TVal, Subtarget)) {
4985  std::swap(TrueVal, FalseVal);
4986  std::swap(TVal, FVal);
4987  CC = ISD::getSetCCInverse(CC, LHS.getValueType());
4988  }
4989 
4990  // Attempt to use ZR checking TVal is 0, possibly inverting the condition
4991  // to get there. CSINC not is invertable like the other two (~(~a) == a,
4992  // -(-a) == a, but (a+1)+1 != a).
4993  if (FVal == 0 && Opcode != ARMISD::CSINC) {
4994  std::swap(TrueVal, FalseVal);
4995  std::swap(TVal, FVal);
4996  CC = ISD::getSetCCInverse(CC, LHS.getValueType());
4997  }
4998  if (TVal == 0)
4999  TrueVal = DAG.getRegister(ARM::ZR, MVT::i32);
5000 
5001  // Drops F's value because we can get it by inverting/negating TVal.
5002  FalseVal = TrueVal;
5003 
5004  SDValue ARMcc;
5005  SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
5006  EVT VT = TrueVal.getValueType();
5007  return DAG.getNode(Opcode, dl, VT, TrueVal, FalseVal, ARMcc, Cmp);
5008  }
5009  }
5010 
5011  if (isUnsupportedFloatingType(LHS.getValueType())) {
5013  DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS);
5014 
5015  // If softenSetCCOperands only returned one value, we should compare it to
5016  // zero.
5017  if (!RHS.getNode()) {
5018  RHS = DAG.getConstant(0, dl, LHS.getValueType());
5019  CC = ISD::SETNE;
5020  }
5021  }
5022 
5023  if (LHS.getValueType() == MVT::i32) {
5024  // Try to generate VSEL on ARMv8.
5025  // The VSEL instruction can't use all the usual ARM condition
5026  // codes: it only has two bits to select the condition code, so it's
5027  // constrained to use only GE, GT, VS and EQ.
5028  //
5029  // To implement all the various ISD::SETXXX opcodes, we sometimes need to
5030  // swap the operands of the previous compare instruction (effectively
5031  // inverting the compare condition, swapping 'less' and 'greater') and
5032  // sometimes need to swap the operands to the VSEL (which inverts the
5033  // condition in the sense of firing whenever the previous condition didn't)
5034  if (Subtarget->hasFPARMv8Base() && (TrueVal.getValueType() == MVT::f16 ||
5035  TrueVal.getValueType() == MVT::f32 ||
5036  TrueVal.getValueType() == MVT::f64)) {
5038  if (CondCode == ARMCC::LT || CondCode == ARMCC::LE ||
5039  CondCode == ARMCC::VC || CondCode == ARMCC::NE) {
5040  CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5041  std::swap(TrueVal, FalseVal);
5042  }
5043  }
5044 
5045  SDValue ARMcc;
5046  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5047  SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
5048  // Choose GE over PL, which vsel does now support
5049  if (cast<ConstantSDNode>(ARMcc)->getZExtValue() == ARMCC::PL)
5050  ARMcc = DAG.getConstant(ARMCC::GE, dl, MVT::i32);
5051  return getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
5052  }
5053 
5054  ARMCC::CondCodes CondCode, CondCode2;
5055  FPCCToARMCC(CC, CondCode, CondCode2);
5056 
5057  // Normalize the fp compare. If RHS is zero we prefer to keep it there so we
5058  // match CMPFPw0 instead of CMPFP, though we don't do this for f16 because we
5059  // must use VSEL (limited condition codes), due to not having conditional f16
5060  // moves.
5061  if (Subtarget->hasFPARMv8Base() &&
5062  !(isFloatingPointZero(RHS) && TrueVal.getValueType() != MVT::f16) &&
5063  (TrueVal.getValueType() == MVT::f16 ||
5064  TrueVal.getValueType() == MVT::f32 ||
5065  TrueVal.getValueType() == MVT::f64)) {
5066  bool swpCmpOps = false;
5067  bool swpVselOps = false;
5068  checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps);
5069 
5070  if (CondCode == ARMCC::GT || CondCode == ARMCC::GE ||
5071  CondCode == ARMCC::VS || CondCode == ARMCC::EQ) {
5072  if (swpCmpOps)
5073  std::swap(LHS, RHS);
5074  if (swpVselOps)
5075  std::swap(TrueVal, FalseVal);
5076  }
5077  }
5078 
5079  SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
5080  SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
5081  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5082  SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
5083  if (CondCode2 != ARMCC::AL) {
5084  SDValue ARMcc2 = DAG.getConstant(CondCode2, dl, MVT::i32);
5085  // FIXME: Needs another CMP because flag can have but one use.
5086  SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
5087  Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, CCR, Cmp2, DAG);
5088  }
5089  return Result;
5090 }
5091 
5092 /// canChangeToInt - Given the fp compare operand, return true if it is suitable
5093 /// to morph to an integer compare sequence.
5094 static bool canChangeToInt(SDValue Op, bool &SeenZero,
5095  const ARMSubtarget *Subtarget) {
5096  SDNode *N = Op.getNode();
5097  if (!N->hasOneUse())
5098  // Otherwise it requires moving the value from fp to integer registers.
5099  return false;
5100  if (!N->getNumValues())
5101  return false;
5102  EVT VT = Op.getValueType();
5103  if (VT != MVT::f32 && !Subtarget->isFPBrccSlow())
5104  // f32 case is generally profitable. f64 case only makes sense when vcmpe +
5105  // vmrs are very slow, e.g. cortex-a8.
5106  return false;
5107 
5108  if (isFloatingPointZero(Op)) {
5109  SeenZero = true;
5110  return true;
5111  }
5112  return ISD::isNormalLoad(N);
5113 }
5114 
5116  if (isFloatingPointZero(Op))
5117  return DAG.getConstant(0, SDLoc(Op), MVT::i32);
5118 
5119  if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
5120  return DAG.getLoad(MVT::i32, SDLoc(Op), Ld->getChain(), Ld->getBasePtr(),
5121  Ld->getPointerInfo(), Ld->getAlignment(),
5122  Ld->getMemOperand()->getFlags());
5123 
5124  llvm_unreachable("Unknown VFP cmp argument!");
5125 }
5126 
5127 static void expandf64Toi32(SDValue Op, SelectionDAG &DAG,
5128  SDValue &RetVal1, SDValue &RetVal2) {
5129  SDLoc dl(Op);
5130 
5131  if (isFloatingPointZero(Op)) {
5132  RetVal1 = DAG.getConstant(0, dl, MVT::i32);
5133  RetVal2 = DAG.getConstant(0, dl, MVT::i32);
5134  return;
5135  }
5136 
5137  if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) {
5138  SDValue Ptr = Ld->getBasePtr();
5139  RetVal1 =
5140  DAG.getLoad(MVT::i32, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(),
5141  Ld->getAlignment(), Ld->getMemOperand()->getFlags());
5142 
5143  EVT PtrType = Ptr.getValueType();
5144  unsigned NewAlign = MinAlign(Ld->getAlignment(), 4);
5145  SDValue NewPtr = DAG.getNode(ISD::ADD, dl,
5146  PtrType, Ptr, DAG.getConstant(4, dl, PtrType));
5147  RetVal2 = DAG.getLoad(MVT::i32, dl, Ld->getChain(), NewPtr,
5148  Ld->getPointerInfo().getWithOffset(4), NewAlign,
5149  Ld->getMemOperand()->getFlags());
5150  return;
5151  }
5152 
5153  llvm_unreachable("Unknown VFP cmp argument!");
5154 }
5155 
5156 /// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some
5157 /// f32 and even f64 comparisons to integer ones.
5158 SDValue
5159 ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
5160  SDValue Chain = Op.getOperand(0);
5161  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
5162  SDValue LHS = Op.getOperand(2);
5163  SDValue RHS = Op.getOperand(3);
5164  SDValue Dest = Op.getOperand(4);
5165  SDLoc dl(Op);
5166 
5167  bool LHSSeenZero = false;
5168  bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget);
5169  bool RHSSeenZero = false;
5170  bool RHSOk = canChangeToInt(RHS, RHSSeenZero, Subtarget);
5171  if (LHSOk && RHSOk && (LHSSeenZero || RHSSeenZero)) {
5172  // If unsafe fp math optimization is enabled and there are no other uses of
5173  // the CMP operands, and the condition code is EQ or NE, we can optimize it
5174  // to an integer comparison.
5175  if (CC == ISD::SETOEQ)
5176  CC = ISD::SETEQ;
5177  else if (CC == ISD::SETUNE)
5178  CC = ISD::SETNE;
5179 
5180  SDValue Mask = DAG.getConstant(0x7fffffff, dl, MVT::i32);
5181  SDValue ARMcc;
5182  if (LHS.getValueType() == MVT::f32) {
5183  LHS = DAG.getNode(ISD::AND, dl, MVT::i32,
5184  bitcastf32Toi32(LHS, DAG), Mask);
5185  RHS = DAG.getNode(ISD::AND, dl, MVT::i32,
5186  bitcastf32Toi32(RHS, DAG), Mask);
5187  SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
5188  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5189  return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
5190  Chain, Dest, ARMcc, CCR, Cmp);
5191  }
5192 
5193  SDValue LHS1, LHS2;
5194  SDValue RHS1, RHS2;
5195  expandf64Toi32(LHS, DAG, LHS1, LHS2);
5196  expandf64Toi32(RHS, DAG, RHS1, RHS2);
5197  LHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, LHS2, Mask);
5198  RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask);
5200  ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
5201  SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
5202  SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };
5203  return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops);
5204  }
5205 
5206  return SDValue();
5207 }
5208 
5209 SDValue ARMTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
5210  SDValue Chain = Op.getOperand(0);
5211  SDValue Cond = Op.getOperand(1);
5212  SDValue Dest = Op.getOperand(2);
5213  SDLoc dl(Op);
5214 
5215  // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
5216  // instruction.
5217  unsigned Opc = Cond.getOpcode();
5218  bool OptimizeMul = (Opc == ISD::SMULO || Opc == ISD::UMULO) &&
5219  !Subtarget->isThumb1Only();
5220  if (Cond.getResNo() == 1 &&
5221  (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
5222  Opc == ISD::USUBO || OptimizeMul)) {
5223  // Only lower legal XALUO ops.
5224  if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0)))
5225  return SDValue();
5226 
5227  // The actual operation with overflow check.
5228  SDValue Value, OverflowCmp;
5229  SDValue ARMcc;
5230  std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);
5231 
5232  // Reverse the condition code.
5234  (ARMCC::CondCodes)cast<const ConstantSDNode>(ARMcc)->getZExtValue();
5235  CondCode = ARMCC::getOppositeCondition(CondCode);
5236  ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32);
5237  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5238 
5239  return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, CCR,
5240  OverflowCmp);
5241  }
5242 
5243  return SDValue();
5244 }
5245 
5246 SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
5247  SDValue Chain = Op.getOperand(0);
5248  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
5249  SDValue LHS = Op.getOperand(2);
5250  SDValue RHS = Op.getOperand(3);
5251  SDValue Dest = Op.getOperand(4);
5252  SDLoc dl(Op);
5253 
5254  if (isUnsupportedFloatingType(LHS.getValueType())) {
5256  DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS);
5257 
5258  // If softenSetCCOperands only returned one value, we should compare it to
5259  // zero.
5260  if (!RHS.getNode()) {
5261  RHS = DAG.getConstant(0, dl, LHS.getValueType());
5262  CC = ISD::SETNE;
5263  }
5264  }
5265 
5266  // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
5267  // instruction.
5268  unsigned Opc = LHS.getOpcode();
5269  bool OptimizeMul = (Opc == ISD::SMULO || Opc == ISD::UMULO) &&
5270  !Subtarget->isThumb1Only();
5271  if (LHS.getResNo() == 1 && (isOneConstant(RHS) || isNullConstant(RHS)) &&
5272  (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
5273  Opc == ISD::USUBO || OptimizeMul) &&
5274  (CC == ISD::SETEQ || CC == ISD::SETNE)) {
5275  // Only lower legal XALUO ops.
5276  if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS->getValueType(0)))
5277  return SDValue();
5278 
5279  // The actual operation with overflow check.
5280  SDValue Value, OverflowCmp;
5281  SDValue ARMcc;
5282  std::tie(Value, OverflowCmp) = getARMXALUOOp(LHS.getValue(0), DAG, ARMcc);
5283 
5284  if ((CC == ISD::SETNE) != isOneConstant(RHS)) {
5285  // Reverse the condition code.
5287  (ARMCC::CondCodes)cast<const ConstantSDNode>(ARMcc)->getZExtValue();
5288  CondCode = ARMCC::getOppositeCondition(CondCode);
5289  ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32);
5290  }
5291  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5292 
5293  return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, CCR,
5294  OverflowCmp);
5295  }
5296 
5297  if (LHS.getValueType() == MVT::i32) {
5298  SDValue ARMcc;
5299  SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
5300  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5301  return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
5302  Chain, Dest, ARMcc, CCR, Cmp);
5303  }
5304 
5305  if (getTargetMachine().Options.UnsafeFPMath &&
5306  (CC == ISD::SETEQ || CC == ISD::SETOEQ ||
5307  CC == ISD::SETNE || CC == ISD::SETUNE)) {
5308  if (SDValue Result = OptimizeVFPBrcond(Op, DAG))
5309  return Result;
5310  }
5311 
5312  ARMCC::CondCodes CondCode, CondCode2;
5313  FPCCToARMCC(CC, CondCode, CondCode2);
5314 
5315  SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
5316  SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
5317  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5318  SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
5319  SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp };
5320  SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
5321  if (CondCode2 != ARMCC::AL) {
5322  ARMcc = DAG.getConstant(CondCode2, dl, MVT::i32);
5323  SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) };
5324  Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
5325  }
5326  return Res;
5327 }
5328 
5329 SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
5330  SDValue Chain = Op.getOperand(0);
5331  SDValue Table = Op.getOperand(1);
5332  SDValue Index = Op.getOperand(2);
5333  SDLoc dl(Op);
5334 
5335  EVT PTy = getPointerTy(DAG.getDataLayout());
5336  JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
5337  SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
5338  Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI);
5339  Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, dl, PTy));
5340  SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Table, Index);
5341  if (Subtarget->isThumb2() || (Subtarget->hasV8MBaselineOps() && Subtarget->isThumb())) {
5342  // Thumb2 and ARMv8-M use a two-level jump. That is, it jumps into the jump table
5343  // which does another jump to the destination. This also makes it easier
5344  // to translate it to TBB / TBH later (Thumb2 only).
5345  // FIXME: This might not work if the function is extremely large.
5346  return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain,
5347  Addr, Op.getOperand(2), JTI);
5348  }
5349  if (isPositionIndependent() || Subtarget->isROPI()) {
5350  Addr =
5351  DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
5353  Chain = Addr.getValue(1);
5354  Addr = DAG.getNode(ISD::ADD, dl, PTy, Table, Addr);
5355  return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
5356  } else {
5357  Addr =
5358  DAG.getLoad(PTy, dl, Chain, Addr,
5360  Chain = Addr.getValue(1);
5361  return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
5362  }
5363 }
5364 
5366  EVT VT = Op.getValueType();
5367  SDLoc dl(Op);
5368 
5369  if (Op.getValueType().getVectorElementType() == MVT::i32) {
5371  return Op;
5372  return DAG.UnrollVectorOp(Op.getNode());
5373  }
5374 
5375  const bool HasFullFP16 =
5376  static_cast<const ARMSubtarget&>(DAG.getSubtarget()).hasFullFP16();
5377 
5378  EVT NewTy;
5379  const EVT OpTy = Op.getOperand(0).getValueType();
5380  if (OpTy == MVT::v4f32)
5381  NewTy = MVT::v4i32;
5382  else if (OpTy == MVT::v4f16 && HasFullFP16)
5383  NewTy = MVT::v4i16;
5384  else if (OpTy == MVT::v8f16 && HasFullFP16)
5385  NewTy = MVT::v8i16;
5386  else
5387  llvm_unreachable("Invalid type for custom lowering!");
5388 
5389  if (VT != MVT::v4i16 && VT != MVT::v8i16)
5390  return DAG.UnrollVectorOp(Op.getNode());
5391 
5392  Op = DAG.getNode(Op.getOpcode(), dl, NewTy, Op.getOperand(0));
5393  return DAG.getNode(ISD::TRUNCATE, dl, VT, Op);
5394 }
5395 
5396 SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
5397  EVT VT = Op.getValueType();
5398  if (VT.isVector())
5399  return LowerVectorFP_TO_INT(Op, DAG);
5400 
5401  bool IsStrict = Op->isStrictFPOpcode();
5402  SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
5403 
5404  if (isUnsupportedFloatingType(SrcVal.getValueType())) {
5405  RTLIB::Libcall LC;
5406  if (Op.getOpcode() == ISD::FP_TO_SINT ||
5408  LC = RTLIB::getFPTOSINT(SrcVal.getValueType(),
5409  Op.getValueType());
5410  else
5411  LC = RTLIB::getFPTOUINT(SrcVal.getValueType(),
5412  Op.getValueType());
5413  SDLoc Loc(Op);
5414  MakeLibCallOptions CallOptions;
5415  SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
5416  SDValue Result;
5417  std::tie(Result, Chain) = makeLibCall(DAG, LC, Op.getValueType(), SrcVal,
5418  CallOptions, Loc, Chain);
5419  return IsStrict ? DAG.getMergeValues({Result, Chain}, Loc) : Result;
5420  }
5421 
5422  // FIXME: Remove this when we have strict fp instruction selection patterns
5423  if (IsStrict) {
5424  SDLoc Loc(Op);
5425  SDValue Result =
5427  : ISD::FP_TO_UINT,
5428  Loc, Op.getValueType(), SrcVal);
5429  return DAG.getMergeValues({Result, Op.getOperand(0)}, Loc);
5430  }
5431 
5432  return Op;
5433 }
5434 
5436  EVT VT = Op.getValueType();
5437  SDLoc dl(Op);
5438 
5440  if (VT.getVectorElementType() == MVT::f32)
5441  return Op;
5442  return DAG.UnrollVectorOp(Op.getNode());
5443  }
5444 
5445  assert((Op.getOperand(0).getValueType() == MVT::v4i16 ||
5446  Op.getOperand(0).getValueType() == MVT::v8i16) &&
5447  "Invalid type for custom lowering!");
5448 
5449  const bool HasFullFP16 =
5450  static_cast<const ARMSubtarget&>(DAG.getSubtarget()).hasFullFP16();
5451 
5452  EVT DestVecType;
5453  if (VT == MVT::v4f32)
5454  DestVecType = MVT::v4i32;
5455  else if (VT == MVT::v4f16 && HasFullFP16)
5456  DestVecType = MVT::v4i16;
5457  else if (VT == MVT::v8f16 && HasFullFP16)
5458  DestVecType = MVT::v8i16;
5459  else
5460  return DAG.UnrollVectorOp(Op.getNode());
5461 
5462  unsigned CastOpc;
5463  unsigned Opc;
5464  switch (Op.getOpcode()) {
5465  default: llvm_unreachable("Invalid opcode!");
5466  case ISD::SINT_TO_FP:
5467  CastOpc = ISD::SIGN_EXTEND;
5468  Opc = ISD::SINT_TO_FP;
5469  break;
5470  case ISD::UINT_TO_FP:
5471  CastOpc = ISD::ZERO_EXTEND;
5472  Opc = ISD::UINT_TO_FP;
5473  break;
5474  }
5475 
5476  Op = DAG.getNode(CastOpc, dl, DestVecType, Op.getOperand(0));
5477  return DAG.getNode(Opc, dl, VT, Op);
5478 }
5479 
5480 SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const {
5481  EVT VT = Op.getValueType();
5482  if (VT.isVector())
5483  return LowerVectorINT_TO_FP(Op, DAG);
5484  if (isUnsupportedFloatingType(VT)) {
5485  RTLIB::Libcall LC;
5486  if (Op.getOpcode() == ISD::SINT_TO_FP)
5488  Op.getValueType());
5489  else
5491  Op.getValueType());
5492  MakeLibCallOptions CallOptions;
5493  return makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(0),
5494  CallOptions, SDLoc(Op)).first;
5495  }
5496 
5497  return Op;
5498 }
5499 
5500 SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
5501  // Implement fcopysign with a fabs and a conditional fneg.
5502  SDValue Tmp0 = Op.getOperand(0);
5503  SDValue Tmp1 = Op.getOperand(1);
5504  SDLoc dl(Op);
5505  EVT VT = Op.getValueType();
5506  EVT SrcVT = Tmp1.getValueType();
5507  bool InGPR = Tmp0.getOpcode() == ISD::BITCAST ||
5508  Tmp0.getOpcode() == ARMISD::VMOVDRR;
5509  bool UseNEON = !InGPR && Subtarget->hasNEON();
5510 
5511  if (UseNEON) {
5512  // Use VBSL to copy the sign bit.
5513  unsigned EncodedVal = ARM_AM::createVMOVModImm(0x6, 0x80);
5515  DAG.getTargetConstant(EncodedVal, dl, MVT::i32));
5516  EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64;
5517  if (VT == MVT::f64)
5518  Mask = DAG.getNode(ARMISD::VSHLIMM, dl, OpVT,
5519  DAG.getNode(ISD::BITCAST, dl, OpVT, Mask),
5520  DAG.getConstant(32, dl, MVT::i32));
5521  else /*if (VT == MVT::f32)*/
5522  Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0);
5523  if (SrcVT == MVT::f32) {
5524  Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1);
5525  if (VT == MVT::f64)
5526  Tmp1 = DAG.getNode(ARMISD::VSHLIMM, dl, OpVT,
5527  DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1),
5528  DAG.getConstant(32, dl, MVT::i32));
5529  } else if (VT == MVT::f32)
5530  Tmp1 = DAG.getNode(ARMISD::VSHRuIMM, dl, MVT::v1i64,
5531  DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1),
5532  DAG.getConstant(32, dl, MVT::i32));
5533  Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0);
5534  Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1);
5535 
5536  SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createVMOVModImm(0xe, 0xff),
5537  dl, MVT::i32);
5538  AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes);
5539  SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask,
5540  DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes));
5541 
5542  SDValue Res = DAG.getNode(ISD::OR, dl, OpVT,
5543  DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask),
5544  DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot));
5545  if (VT == MVT::f32) {
5546  Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res);
5547  Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res,
5548  DAG.getConstant(0, dl, MVT::i32));
5549  } else {
5550  Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res);
5551  }
5552 
5553  return Res;
5554  }
5555 
5556  // Bitcast operand 1 to i32.
5557  if (SrcVT == MVT::f64)
5558  Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
5559  Tmp1).getValue(1);
5560  Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1);
5561 
5562  // Or in the signbit with integer operations.
5563  SDValue Mask1 = DAG.getConstant(0x80000000, dl, MVT::i32);
5564  SDValue Mask2 = DAG.getConstant(0x7fffffff, dl, MVT::i32);
5565  Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1);
5566  if (VT == MVT::f32) {
5567  Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32,
5568  DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2);
5569  return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
5570  DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1));
5571  }
5572 
5573  // f64: Or the high part with signbit and then combine two parts.
5574  Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
5575  Tmp0);
5576  SDValue Lo = Tmp0.getValue(0);
5577  SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2);
5578  Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1);
5579  return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
5580 }
5581 
5582 SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
5583  MachineFunction &MF = DAG.getMachineFunction();
5584  MachineFrameInfo &MFI = MF.getFrameInfo();
5585  MFI.setReturnAddressIsTaken(true);
5586 
5588  return SDValue();
5589 
5590  EVT VT = Op.getValueType();
5591  SDLoc dl(Op);
5592  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
5593  if (Depth) {
5594  SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
5595  SDValue Offset = DAG.getConstant(4, dl, MVT::i32);
5596  return DAG.getLoad(VT, dl, DAG.getEntryNode(),
5597  DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
5598  MachinePointerInfo());
5599  }
5600 
5601  // Return LR, which contains the return address. Mark it an implicit live-in.
5602  unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
5603  return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
5604 }
5605 
5606 SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
5607  const ARMBaseRegisterInfo &ARI =
5608  *static_cast<const ARMBaseRegisterInfo*>(RegInfo);
5609  MachineFunction &MF = DAG.getMachineFunction();
5610  MachineFrameInfo &MFI = MF.getFrameInfo();
5611  MFI.setFrameAddressIsTaken(true);
5612 
5613  EVT VT = Op.getValueType();
5614  SDLoc dl(Op); // FIXME probably not meaningful
5615  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
5616  Register FrameReg = ARI.getFrameRegister(MF);
5617  SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
5618  while (Depth--)
5619  FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
5620  MachinePointerInfo());
5621  return FrameAddr;
5622 }
5623 
5624 // FIXME? Maybe this could be a TableGen attribute on some registers and
5625 // this table could be generated automatically from RegInfo.
5626 Register ARMTargetLowering::getRegisterByName(const char* RegName, LLT VT,
5627  const MachineFunction &MF) const {
5629  .Case("sp", ARM::SP)
5630  .Default(0);
5631  if (Reg)
5632  return Reg;
5633  report_fatal_error(Twine("Invalid register name \""
5634  + StringRef(RegName) + "\"."));
5635 }
5636 
5637 // Result is 64 bit value so split into two 32 bit values and return as a
5638 // pair of values.
5640  SelectionDAG &DAG) {
5641  SDLoc DL(N);
5642 
5643  // This function is only supposed to be called for i64 type destination.
5644  assert(N->getValueType(0) == MVT::i64
5645  && "ExpandREAD_REGISTER called for non-i64 type result.");
5646 
5647  SDValue Read = DAG.getNode(ISD::READ_REGISTER, DL,
5649  N->getOperand(0),
5650  N->getOperand(1));
5651 
5652  Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Read.getValue(0),
5653  Read.getValue(1)));
5654  Results.push_back(Read.getOperand(0));
5655 }
5656 
5657 /// \p BC is a bitcast that is about to be turned into a VMOVDRR.
5658 /// When \p DstVT, the destination type of \p BC, is on the vector
5659 /// register bank and the source of bitcast, \p Op, operates on the same bank,
5660 /// it might be possible to combine them, such that everything stays on the
5661 /// vector register bank.
5662 /// \p return The node that would replace \p BT, if the combine
5663 /// is possible.
5665  SelectionDAG &DAG) {
5666  SDValue Op = BC->getOperand(0);
5667  EVT DstVT = BC->getValueType(0);
5668 
5669  // The only vector instruction that can produce a scalar (remember,
5670  // since the bitcast was about to be turned into VMOVDRR, the source
5671  // type is i64) from a vector is EXTRACT_VECTOR_ELT.
5672  // Moreover, we can do this combine only if there is one use.
5673  // Finally, if the destination type is not a vector, there is not
5674  // much point on forcing everything on the vector bank.
5675  if (!DstVT.isVector() || Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
5676  !Op.hasOneUse())
5677  return SDValue();
5678 
5679  // If the index is not constant, we will introduce an additional
5680  // multiply that will stick.
5681  // Give up in that case.
5683  if (!Index)
5684  return SDValue();
5685  unsigned DstNumElt = DstVT.getVectorNumElements();
5686 
5687  // Compute the new index.
5688  const APInt &APIntIndex = Index->getAPIntValue();
5689  APInt NewIndex(APIntIndex.getBitWidth(), DstNumElt);
5690  NewIndex *= APIntIndex;
5691  // Check if the new constant index fits into i32.
5692  if (NewIndex.getBitWidth() > 32)
5693  return SDValue();
5694 
5695  // vMTy bitcast(i64 extractelt vNi64 src, i32 index) ->
5696  // vMTy extractsubvector vNxMTy (bitcast vNi64 src), i32 index*M)
5697  SDLoc dl(Op);
5698  SDValue ExtractSrc = Op.getOperand(0);
5699  EVT VecVT = EVT::getVectorVT(
5700  *DAG.getContext(), DstVT.getScalarType(),
5701  ExtractSrc.getValueType().getVectorNumElements() * DstNumElt);
5702  SDValue BitCast = DAG.getNode(ISD::BITCAST, dl, VecVT, ExtractSrc);
5703  return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DstVT, BitCast,
5704  DAG.getConstant(NewIndex.getZExtValue(), dl, MVT::i32));
5705 }
5706 
5707 /// ExpandBITCAST - If the target supports VFP, this function is called to
5708 /// expand a bit convert where either the source or destination type is i64 to
5709 /// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64
5710 /// operand type is illegal (e.g., v2f32 for a target that doesn't support
5711 /// vectors), since the legalizer won't know what to do with that.
5713  const ARMSubtarget *Subtarget) {
5714  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5715  SDLoc dl(N);
5716  SDValue Op = N->getOperand(0);
5717 
5718  // This function is only supposed to be called for i64 types, either as the
5719  // source or destination of the bit convert.
5720  EVT SrcVT = Op.getValueType();
5721  EVT DstVT = N->getValueType(0);
5722  const bool HasFullFP16 = Subtarget->hasFullFP16();
5723 
5724  if (SrcVT == MVT::f32 && DstVT == MVT::i32) {
5725  // FullFP16: half values are passed in S-registers, and we don't
5726  // need any of the bitcast and moves:
5727  //
5728  // t2: f32,ch = CopyFromReg t0, Register:f32 %0
5729  // t5: i32 = bitcast t2
5730  // t18: f16 = ARMISD::VMOVhr t5
5731  if (Op.getOpcode() != ISD::CopyFromReg ||
5732  Op.getValueType() != MVT::f32)
5733  return SDValue();
5734 
5735  auto Move = N->use_begin();
5736  if (Move->getOpcode() != ARMISD::VMOVhr)
5737  return SDValue();
5738 
5739  SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1) };
5740  SDValue Copy = DAG.getNode(ISD::CopyFromReg, SDLoc(Op), MVT::f16, Ops);
5741  DAG.ReplaceAllUsesWith(*Move, &Copy);
5742  return Copy;
5743  }
5744 
5745  if (SrcVT == MVT::i16 && DstVT == MVT::f16) {
5746  if (!HasFullFP16)
5747  return SDValue();
5748  // SoftFP: read half-precision arguments:
5749  //
5750  // t2: i32,ch = ...
5751  // t7: i16 = truncate t2 <~~~~ Op
5752  // t8: f16 = bitcast t7 <~~~~ N
5753  //
5754  if (Op.getOperand(0).getValueType() == MVT::i32)
5755  return DAG.getNode(ARMISD::VMOVhr, SDLoc(Op),
5756  MVT::f16, Op.getOperand(0));
5757 
5758  return SDValue();
5759  }
5760 
5761  // Half-precision return values
5762  if (SrcVT == MVT::f16 && DstVT == MVT::i16) {
5763  if (!HasFullFP16)
5764  return SDValue();
5765  //
5766  // t11: f16 = fadd t8, t10
5767  // t12: i16 = bitcast t11 <~~~ SDNode N
5768  // t13: i32 = zero_extend t12
5769  // t16: ch,glue = CopyToReg t0, Register:i32 %r0, t13
5770  // t17: ch = ARMISD::RET_FLAG t16, Register:i32 %r0, t16:1
5771  //
5772  // transform this into:
5773  //
5774  // t20: i32 = ARMISD::VMOVrh t11
5775  // t16: ch,glue = CopyToReg t0, Register:i32 %r0, t20
5776  //
5777  auto ZeroExtend = N->use_begin();
5778  if (N->use_size() != 1 || ZeroExtend->getOpcode() != ISD::ZERO_EXTEND ||
5779  ZeroExtend->getValueType(0) != MVT::i32)
5780  return SDValue();
5781 
5782  auto Copy = ZeroExtend->use_begin();
5783  if (Copy->getOpcode() == ISD::CopyToReg &&
5784  Copy->use_begin()->getOpcode() == ARMISD::RET_FLAG) {
5785  SDValue Cvt = DAG.getNode(ARMISD::VMOVrh, SDLoc(Op), MVT::i32, Op);
5786  DAG.ReplaceAllUsesWith(*ZeroExtend, &Cvt);
5787  return Cvt;
5788  }
5789  return SDValue();
5790  }
5791 
5792  if (!(SrcVT == MVT::i64 || DstVT == MVT::i64))
5793  return SDValue();
5794 
5795  // Turn i64->f64 into VMOVDRR.
5796  if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) {
5797  // Do not force values to GPRs (this is what VMOVDRR does for the inputs)
5798  // if we can combine the bitcast with its source.
5799  if (SDValue Val = CombineVMOVDRRCandidateWithVecOp(N, DAG))
5800  return Val;
5801 
5803  DAG.getConstant(0, dl, MVT::i32));
5805  DAG.getConstant(1, dl, MVT::i32));
5806  return DAG.getNode(ISD::BITCAST, dl, DstVT,
5807  DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi));
5808  }
5809 
5810  // Turn f64->i64 into VMOVRRD.
5811  if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) {
5812  SDValue Cvt;
5813  if (DAG.getDataLayout().isBigEndian() && SrcVT.isVector() &&
5814  SrcVT.getVectorNumElements() > 1)
5815  Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
5816  DAG.getVTList(MVT::i32, MVT::i32),
5817  DAG.getNode(ARMISD::VREV64, dl, SrcVT, Op));
5818  else
5819  Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
5820  DAG.getVTList(MVT::i32, MVT::i32), Op);
5821  // Merge the pieces into a single i64 value.
5822  return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1));
5823  }
5824 
5825  return SDValue();
5826 }
5827 
5828 /// getZeroVector - Returns a vector of specified type with all zero elements.
5829 /// Zero vectors are used to represent vector negation and in those cases
5830 /// will be implemented with the NEON VNEG instruction. However, VNEG does
5831 /// not support i64 elements, so sometimes the zero vectors will need to be
5832 /// explicitly constructed. Regardless, use a canonical VMOV to create the
5833 /// zero vector.
5834 static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl) {
5835  assert(VT.isVector() && "Expected a vector type");
5836  // The canonical modified immediate encoding of a zero vector is....0!
5837  SDValue EncodedVal = DAG.getTargetConstant(0, dl, MVT::i32);
5838  EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
5839  SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal);
5840  return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
5841 }
5842 
5843 /// LowerShiftRightParts - Lower SRA_PARTS, which returns two
5844 /// i32 values and take a 2 x i32 value to shift plus a shift amount.
5845 SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
5846  SelectionDAG &DAG) const {
5847  assert(Op.getNumOperands() == 3 && "Not a double-shift!");
5848  EVT VT = Op.getValueType();
5849  unsigned VTBits = VT.getSizeInBits();
5850  SDLoc dl(Op);
5851  SDValue ShOpLo = Op.getOperand(0);
5852  SDValue ShOpHi = Op.getOperand(1);
5853  SDValue ShAmt = Op.getOperand(2);
5854  SDValue ARMcc;
5855  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5856  unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
5857 
5859 
5860  SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
5861  DAG.getConstant(VTBits, dl, MVT::i32), ShAmt);
5862  SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
5863  SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
5864  DAG.getConstant(VTBits, dl, MVT::i32));
5865  SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
5866  SDValue LoSmallShift = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
5867  SDValue LoBigShift = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
5868  SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
5869  ISD::SETGE, ARMcc, DAG, dl);
5870  SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift, LoBigShift,
5871  ARMcc, CCR, CmpLo);
5872 
5873  SDValue HiSmallShift = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
5874  SDValue HiBigShift = Opc == ISD::SRA
5875  ? DAG.getNode(Opc, dl, VT, ShOpHi,
5876  DAG.getConstant(VTBits - 1, dl, VT))
5877  : DAG.getConstant(0, dl, VT);
5878  SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
5879  ISD::SETGE, ARMcc, DAG, dl);
5880  SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift,
5881  ARMcc, CCR, CmpHi);
5882 
5883  SDValue Ops[2] = { Lo, Hi };
5884  return DAG.getMergeValues(Ops, dl);
5885 }
5886 
5887 /// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
5888 /// i32 values and take a 2 x i32 value to shift plus a shift amount.
5889 SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
5890  SelectionDAG &DAG) const {
5891  assert(Op.getNumOperands() == 3 && "Not a double-shift!");
5892  EVT VT = Op.getValueType();
5893  unsigned VTBits = VT.getSizeInBits();
5894  SDLoc dl(Op);
5895  SDValue ShOpLo = Op.getOperand(0);
5896  SDValue ShOpHi = Op.getOperand(1);
5897  SDValue ShAmt = Op.getOperand(2);
5898  SDValue ARMcc;
5899  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5900 
5901  assert(Op.getOpcode() == ISD::SHL_PARTS);
5902  SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
5903  DAG.getConstant(VTBits, dl, MVT::i32), ShAmt);
5904  SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
5905  SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
5906  SDValue HiSmallShift = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
5907 
5908  SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
5909  DAG.getConstant(VTBits, dl, MVT::i32));
5910  SDValue HiBigShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
5911  SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
5912  ISD::SETGE, ARMcc, DAG, dl);
5913  SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift,
5914  ARMcc, CCR, CmpHi);
5915 
5916  SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
5917  ISD::SETGE, ARMcc, DAG, dl);
5918  SDValue LoSmallShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
5919  SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift,
5920  DAG.getConstant(0, dl, VT), ARMcc, CCR, CmpLo);
5921 
5922  SDValue Ops[2] = { Lo, Hi };
5923  return DAG.getMergeValues(Ops, dl);
5924 }
5925 
5926 SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
5927  SelectionDAG &DAG) const {
5928  // The rounding mode is in bits 23:22 of the FPSCR.
5929  // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
5930  // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
5931  // so that the shift + and get folded into a bitfield extract.
5932  SDLoc dl(Op);
5933  SDValue Ops[] = { DAG.getEntryNode(),
5934  DAG.getConstant(Intrinsic::arm_get_fpscr, dl, MVT::i32) };
5935 
5936  SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_W_CHAIN, dl, MVT::i32, Ops);
5937  SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR,
5938  DAG.getConstant(1U << 22, dl, MVT::i32));
5939  SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
5940  DAG.getConstant(22, dl, MVT::i32));
5941  return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
5942  DAG.getConstant(3, dl, MVT::i32));
5943 }
5944 
5946  const ARMSubtarget *ST) {
5947  SDLoc dl(N);
5948  EVT VT = N->getValueType(0);
5949  if (VT.isVector() && ST->hasNEON()) {
5950 
5951  // Compute the least significant set bit: LSB = X & -X
5952  SDValue X = N->getOperand(0);
5953  SDValue NX = DAG.getNode(ISD::SUB, dl, VT, getZeroVector(VT, DAG, dl), X);
5954  SDValue LSB = DAG.getNode(ISD::AND, dl, VT, X, NX);
5955 
5956  EVT ElemTy = VT.getVectorElementType();
5957 
5958  if (ElemTy == MVT::i8) {
5959  // Compute with: cttz(x) = ctpop(lsb - 1)
5960  SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
5961  DAG.getTargetConstant(1, dl, ElemTy));
5962  SDValue Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One);
5963  return DAG.getNode(ISD::CTPOP, dl, VT, Bits);
5964  }
5965 
5966  if ((ElemTy == MVT::i16 || ElemTy == MVT::i32) &&
5967  (N->getOpcode() == ISD::CTTZ_ZERO_UNDEF)) {
5968  // Compute with: cttz(x) = (width - 1) - ctlz(lsb), if x != 0
5969  unsigned NumBits = ElemTy.getSizeInBits();
5970  SDValue WidthMinus1 =
5971  DAG.getNode(ARMISD::VMOVIMM, dl, VT,
5972  DAG.getTargetConstant(NumBits - 1, dl, ElemTy));
5973  SDValue CTLZ = DAG.getNode(ISD::CTLZ, dl, VT, LSB);
5974  return DAG.getNode(ISD::SUB, dl, VT, WidthMinus1, CTLZ);
5975  }
5976 
5977  // Compute with: cttz(x) = ctpop(lsb - 1)
5978 
5979  // Compute LSB - 1.
5980  SDValue Bits;
5981  if (ElemTy == MVT::i64) {
5982  // Load constant 0xffff'ffff'ffff'ffff to register.
5983  SDValue FF = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
5984  DAG.getTargetConstant(0x1eff, dl, MVT::i32));
5985  Bits = DAG.getNode(ISD::ADD, dl, VT, LSB, FF);
5986  } else {
5987  SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
5988  DAG.getTargetConstant(1, dl, ElemTy));
5989  Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One);
5990  }
5991  return DAG.getNode(ISD::CTPOP, dl, VT, Bits);
5992  }
5993 
5994  if (!ST->hasV6T2Ops())
5995  return SDValue();
5996 
5997  SDValue rbit = DAG.getNode(ISD::BITREVERSE, dl, VT, N->getOperand(0));
5998  return DAG.getNode(ISD::CTLZ, dl, VT, rbit);
5999 }
6000 
6002  const ARMSubtarget *ST) {
6003  EVT VT = N->getValueType(0);
6004  SDLoc DL(N);
6005 
6006  assert(ST->hasNEON() && "Custom ctpop lowering requires NEON.");
6007  assert((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||
6008  VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&
6009  "Unexpected type for custom ctpop lowering");
6010 
6011  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6012  EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
6013  SDValue Res = DAG.getBitcast(VT8Bit, N->getOperand(0));
6014  Res = DAG.getNode(ISD::CTPOP, DL, VT8Bit, Res);
6015 
6016  // Widen v8i8/v16i8 CTPOP result to VT by repeatedly widening pairwise adds.
6017  unsigned EltSize = 8;
6018  unsigned NumElts = VT.is64BitVector() ? 8 : 16;
6019  while (EltSize != VT.getScalarSizeInBits()) {
6021  Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddlu, DL,
6022  TLI.getPointerTy(DAG.getDataLayout())));
6023  Ops.push_back(Res);
6024 
6025  EltSize *= 2;
6026  NumElts /= 2;
6027  MVT WidenVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize), NumElts);
6028  Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, WidenVT, Ops);
6029  }
6030 
6031  return Res;
6032 }
6033 
6034 /// Getvshiftimm - Check if this is a valid build_vector for the immediate
6035 /// operand of a vector shift operation, where all the elements of the
6036 /// build_vector must have the same constant integer value.
6037 static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
6038  // Ignore bit_converts.
6039  while (Op.getOpcode() == ISD::BITCAST)
6040  Op = Op.getOperand(0);
6042  APInt SplatBits, SplatUndef;
6043  unsigned SplatBitSize;
6044  bool HasAnyUndefs;
6045  if (!BVN ||
6046  !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
6047  ElementBits) ||
6048  SplatBitSize > ElementBits)
6049  return false;
6050  Cnt = SplatBits.getSExtValue();
6051  return true;
6052 }
6053 
6054 /// isVShiftLImm - Check if this is a valid build_vector for the immediate
6055 /// operand of a vector shift left operation. That value must be in the range:
6056 /// 0 <= Value < ElementBits for a left shift; or
6057 /// 0 <= Value <= ElementBits for a long left shift.
6058 static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
6059  assert(VT.isVector() && "vector shift count is not a vector type");
6060  int64_t ElementBits = VT.getScalarSizeInBits();
6061  if (!getVShiftImm(Op, ElementBits, Cnt))
6062  return false;
6063  return (Cnt >= 0 && (isLong ? Cnt - 1 : Cnt) < ElementBits);
6064 }
6065 
6066 /// isVShiftRImm - Check if this is a valid build_vector for the immediate
6067 /// operand of a vector shift right operation. For a shift opcode, the value
6068 /// is positive, but for an intrinsic the value count must be negative. The
6069 /// absolute value must be in the range:
6070 /// 1 <= |Value| <= ElementBits for a right shift; or
6071 /// 1 <= |Value| <= ElementBits/2 for a narrow right shift.
6072 static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic,
6073  int64_t &Cnt) {
6074  assert(VT.isVector() && "vector shift count is not a vector type");
6075  int64_t ElementBits = VT.getScalarSizeInBits();
6076  if (!getVShiftImm(Op, ElementBits, Cnt))
6077  return false;
6078  if (!isIntrinsic)
6079  return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits / 2 : ElementBits));
6080  if (Cnt >= -(isNarrow ? ElementBits / 2 : ElementBits) && Cnt <= -1) {
6081  Cnt = -Cnt;
6082  return true;
6083  }
6084  return false;
6085 }
6086 
6088  const ARMSubtarget *ST) {
6089  EVT VT = N->getValueType(0);
6090  SDLoc dl(N);
6091  int64_t Cnt;
6092 
6093  if (!VT.isVector())
6094  return SDValue();
6095 
6096  // We essentially have two forms here. Shift by an immediate and shift by a
6097  // vector register (there are also shift by a gpr, but that is just handled
6098  // with a tablegen pattern). We cannot easily match shift by an immediate in
6099  // tablegen so we do that here and generate a VSHLIMM/VSHRsIMM/VSHRuIMM.
6100  // For shifting by a vector, we don't have VSHR, only VSHL (which can be
6101  // signed or unsigned, and a negative shift indicates a shift right).
6102  if (N->getOpcode() == ISD::SHL) {
6103  if (isVShiftLImm(N->getOperand(1), VT, false, Cnt))
6104  return DAG.getNode(ARMISD::VSHLIMM, dl, VT, N->getOperand(0),
6105  DAG.getConstant(Cnt, dl, MVT::i32));
6106  return DAG.getNode(ARMISD::VSHLu, dl, VT, N->getOperand(0),
6107  N->getOperand(1));
6108  }
6109 
6110  assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
6111  "unexpected vector shift opcode");
6112 
6113  if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
6114  unsigned VShiftOpc =
6116  return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0),
6117  DAG.getConstant(Cnt, dl, MVT::i32));
6118  }
6119 
6120  // Other right shifts we don't have operations for (we use a shift left by a
6121  // negative number).
6122  EVT ShiftVT = N->getOperand(1).getValueType();
6123  SDValue NegatedCount = DAG.getNode(
6124  ISD::SUB, dl, ShiftVT, getZeroVector(ShiftVT, DAG, dl), N->getOperand(1));
6125  unsigned VShiftOpc =
6127  return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0), NegatedCount);
6128 }
6129 
6131  const ARMSubtarget *ST) {
6132  EVT VT = N->getValueType(0);
6133  SDLoc dl(N);
6134 
6135  // We can get here for a node like i32 = ISD::SHL i32, i64
6136  if (VT != MVT::i64)
6137  return SDValue();
6138 
6139  assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA ||
6140  N->getOpcode() == ISD::SHL) &&
6141  "Unknown shift to lower!");
6142 
6143  unsigned ShOpc = N->getOpcode();
6144  if (ST->hasMVEIntegerOps()) {
6145  SDValue ShAmt = N->getOperand(1);
6146  unsigned ShPartsOpc = ARMISD::LSLL;
6147  ConstantSDNode *Con = dyn_cast<ConstantSDNode>(ShAmt);
6148 
6149  // If the shift amount is greater than 32 or has a greater bitwidth than 64
6150  // then do the default optimisation
6151  if (ShAmt->getValueType(0).getSizeInBits() > 64 ||
6152  (Con && (Con->getZExtValue() == 0 || Con->getZExtValue() >= 32)))
6153  return SDValue();
6154 
6155  // Extract the lower 32 bits of the shift amount if it's not an i32
6156  if (ShAmt->getValueType(0) != MVT::i32)
6157  ShAmt = DAG.getZExtOrTrunc(ShAmt, dl, MVT::i32);
6158 
6159  if (ShOpc == ISD::SRL) {
6160  if (!Con)
6161  // There is no t2LSRLr instruction so negate and perform an lsll if the
6162  // shift amount is in a register, emulating a right shift.
6163  ShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
6164  DAG.getConstant(0, dl, MVT::i32), ShAmt);
6165  else
6166  // Else generate an lsrl on the immediate shift amount
6167  ShPartsOpc = ARMISD::LSRL;
6168  } else if (ShOpc == ISD::SRA)
6169  ShPartsOpc = ARMISD::ASRL;
6170 
6171  // Lower 32 bits of the destination/source
6173  DAG.getConstant(0, dl, MVT::i32));
6174  // Upper 32 bits of the destination/source
6176  DAG.getConstant(1, dl, MVT::i32));
6177 
6178  // Generate the shift operation as computed above
6179  Lo = DAG.getNode(ShPartsOpc, dl, DAG.getVTList(MVT::i32, MVT::i32), Lo, Hi,
6180  ShAmt);
6181  // The upper 32 bits come from the second return value of lsll
6182  Hi = SDValue(Lo.getNode(), 1);
6183  return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6184  }
6185 
6186  // We only lower SRA, SRL of 1 here, all others use generic lowering.
6187  if (!isOneConstant(N->getOperand(1)) || N->getOpcode() == ISD::SHL)
6188  return SDValue();
6189 
6190  // If we are in thumb mode, we don't have RRX.
6191  if (ST->isThumb1Only())
6192  return SDValue();
6193 
6194  // Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr.
6196  DAG.getConstant(0, dl, MVT::i32));
6198  DAG.getConstant(1, dl, MVT::i32));
6199 
6200  // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and
6201  // captures the result into a carry flag.
6202  unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG;
6203  Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), Hi);
6204 
6205  // The low part is an ARMISD::RRX operand, which shifts the carry in.
6206  Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1));
6207 
6208  // Merge the pieces into a single i64 value.
6209  return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6210 }
6211 
6213  const ARMSubtarget *ST) {
6214  bool Invert = false;
6215  bool Swap = false;
6216  unsigned Opc = ARMCC::AL;
6217 
6218  SDValue Op0 = Op.getOperand(0);
6219  SDValue Op1 = Op.getOperand(1);
6220  SDValue CC = Op.getOperand(2);
6221  EVT VT = Op.getValueType();
6222  ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
6223  SDLoc dl(Op);
6224 
6225  EVT CmpVT;
6226  if (ST->hasNEON())
6228  else {
6229  assert(ST->hasMVEIntegerOps() &&
6230  "No hardware support for integer vector comparison!");
6231 
6233  return SDValue();
6234 
6235  // Make sure we expand floating point setcc to scalar if we do not have
6236  // mve.fp, so that we can handle them from there.
6237  if (Op0.getValueType().isFloatingPoint() && !ST->hasMVEFloatOps())
6238  return SDValue();
6239 
6240  CmpVT = VT;
6241  }
6242 
6243  if (Op0.getValueType().getVectorElementType() == MVT::i64 &&
6244  (SetCCOpcode == ISD::SETEQ || SetCCOpcode == ISD::SETNE)) {
6245  // Special-case integer 64-bit equality comparisons. They aren't legal,
6246  // but they can be lowered with a few vector instructions.
6247  unsigned CmpElements = CmpVT.getVectorNumElements() * 2;
6248  EVT SplitVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, CmpElements);
6249  SDValue CastOp0 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op0);
6250  SDValue CastOp1 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op1);
6251  SDValue Cmp = DAG.getNode(ISD::SETCC, dl, SplitVT, CastOp0, CastOp1,
6252  DAG.getCondCode(ISD::SETEQ));
6253  SDValue Reversed = DAG.getNode(ARMISD::VREV64, dl, SplitVT, Cmp);
6254  SDValue Merged = DAG.getNode(ISD::AND, dl, SplitVT, Cmp, Reversed);
6255  Merged = DAG.getNode(ISD::BITCAST, dl, CmpVT, Merged);
6256  if (SetCCOpcode == ISD::SETNE)
6257  Merged = DAG.getNOT(dl, Merged, CmpVT);
6258  Merged = DAG.getSExtOrTrunc(Merged, dl, VT);
6259  return Merged;
6260  }
6261 
6262  if (CmpVT.getVectorElementType() == MVT::i64)
6263  // 64-bit comparisons are not legal in general.
6264  return SDValue();
6265 
6266  if (Op1.getValueType().isFloatingPoint()) {
6267  switch (SetCCOpcode) {
6268  default: llvm_unreachable("Illegal FP comparison");
6269  case ISD::SETUNE:
6270  case ISD::SETNE:
6271  if (ST->hasMVEFloatOps()) {
6272  Opc = ARMCC::NE; break;
6273  } else {
6274  Invert = true; LLVM_FALLTHROUGH;
6275  }
6276  case ISD::SETOEQ:
6277  case ISD::SETEQ: Opc = ARMCC::EQ; break;
6278  case ISD::SETOLT:
6279  case ISD::SETLT: Swap = true; LLVM_FALLTHROUGH;
6280  case ISD::SETOGT:
6281  case ISD::SETGT: Opc = ARMCC::GT; break;
6282  case ISD::SETOLE:
6283  case ISD::SETLE: Swap = true; LLVM_FALLTHROUGH;
6284  case ISD::SETOGE:
6285  case ISD::SETGE: Opc = ARMCC::GE; break;
6286  case ISD::SETUGE: Swap = true; LLVM_FALLTHROUGH;
6287  case ISD::SETULE: Invert = true; Opc = ARMCC::GT; break;
6288  case ISD::SETUGT: Swap = true; LLVM_FALLTHROUGH;
6289  case ISD::SETULT: Invert = true; Opc = ARMCC::GE; break;
6290  case ISD::SETUEQ: Invert = true; LLVM_FALLTHROUGH;
6291  case ISD::SETONE: {
6292  // Expand this to (OLT | OGT).
6293  SDValue TmpOp0 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op1, Op0,
6294  DAG.getConstant(ARMCC::GT, dl, MVT::i32));
6295  SDValue TmpOp1 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op0, Op1,
6296  DAG.getConstant(ARMCC::GT, dl, MVT::i32));
6297  SDValue Result = DAG.getNode(ISD::OR, dl, CmpVT, TmpOp0, TmpOp1);
6298  if (Invert)
6299  Result = DAG.getNOT(dl, Result, VT);
6300  return Result;
6301  }
6302  case ISD::SETUO: Invert = true; LLVM_FALLTHROUGH;
6303  case ISD::SETO: {
6304  // Expand this to (OLT | OGE).
6305  SDValue TmpOp0 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op1, Op0,
6306  DAG.getConstant(ARMCC::GT, dl, MVT::i32));
6307  SDValue TmpOp1 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op0, Op1,
6308  DAG.getConstant(ARMCC::GE, dl, MVT::i32));
6309  SDValue Result = DAG.getNode(ISD::OR, dl, CmpVT, TmpOp0, TmpOp1);
6310  if (Invert)
6311  Result = DAG.getNOT(dl, Result, VT);
6312  return Result;
6313  }
6314  }
6315  } else {
6316  // Integer comparisons.
6317  switch (SetCCOpcode) {
6318  default: llvm_unreachable("Illegal integer comparison");
6319  case ISD::SETNE:
6320  if (ST->hasMVEIntegerOps()) {
6321  Opc = ARMCC::NE; break;
6322  } else {
6323  Invert = true; LLVM_FALLTHROUGH;
6324  }
6325  case ISD::SETEQ: Opc = ARMCC::EQ; break;
6326  case ISD::SETLT: Swap = true; LLVM_FALLTHROUGH;
6327  case ISD::SETGT: Opc = ARMCC::GT; break;
6328  case ISD::SETLE: Swap = true; LLVM_FALLTHROUGH;
6329  case ISD::SETGE: Opc = ARMCC::GE; break;
6330  case ISD::SETULT: Swap = true; LLVM_FALLTHROUGH;
6331  case ISD::SETUGT: Opc = ARMCC::HI; break;
6332  case ISD::SETULE: Swap = true; LLVM_FALLTHROUGH;
6333  case ISD::SETUGE: Opc = ARMCC::HS; break;
6334  }
6335 
6336  // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero).
6337  if (ST->hasNEON() && Opc == ARMCC::EQ) {
6338  SDValue AndOp;
6340  AndOp = Op0;
6341  else if (ISD::isBuildVectorAllZeros(Op0.getNode()))
6342  AndOp = Op1;
6343 
6344  // Ignore bitconvert.
6345  if (AndOp.getNode() && AndOp.getOpcode() == ISD::BITCAST)
6346  AndOp = AndOp.getOperand(0);
6347 
6348  if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) {
6349  Op0 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(0));
6350  Op1 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(1));
6351  SDValue Result = DAG.getNode(ARMISD::VTST, dl, CmpVT, Op0, Op1);
6352  if (!Invert)
6353  Result = DAG.getNOT(dl, Result, VT);
6354  return Result;
6355  }
6356  }
6357  }
6358 
6359  if (Swap)
6360  std::swap(Op0, Op1);
6361 
6362  // If one of the operands is a constant vector zero, attempt to fold the
6363  // comparison to a specialized compare-against-zero form.
6364  SDValue SingleOp;
6366  SingleOp = Op0;
6367  else if (ISD::isBuildVectorAllZeros(Op0.getNode())) {
6368  if (Opc == ARMCC::GE)
6369  Opc = ARMCC::LE;
6370  else if (Opc == ARMCC::GT)
6371  Opc = ARMCC::LT;
6372  SingleOp = Op1;
6373  }
6374 
6375  SDValue Result;
6376  if (SingleOp.getNode()) {
6377  Result = DAG.getNode(ARMISD::VCMPZ, dl, CmpVT, SingleOp,
6378  DAG.getConstant(Opc, dl, MVT::i32));
6379  } else {
6380  Result = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op0, Op1,
6381  DAG.getConstant(Opc, dl, MVT::i32));
6382  }
6383 
6384  Result = DAG.getSExtOrTrunc(Result, dl, VT);
6385 
6386  if (Invert)
6387  Result = DAG.getNOT(dl, Result, VT);
6388 
6389  return Result;
6390 }
6391 
6393  SDValue LHS = Op.getOperand(0);
6394  SDValue RHS = Op.getOperand(1);
6395  SDValue Carry = Op.getOperand(2);
6396  SDValue Cond = Op.getOperand(3);
6397  SDLoc DL(Op);
6398 
6399  assert(LHS.getSimpleValueType().isInteger() && "SETCCCARRY is integer only.");
6400 
6401  // ARMISD::SUBE expects a carry not a borrow like ISD::SUBCARRY so we
6402  // have to invert the carry first.
6403  Carry = DAG.getNode(ISD::SUB, DL, MVT::i32,
6404  DAG.getConstant(1, DL, MVT::i32), Carry);
6405  // This converts the boolean value carry into the carry flag.
6406  Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG);
6407 
6408  SDVTList VTs = DAG.getVTList(LHS.getValueType(), MVT::i32);
6409  SDValue Cmp = DAG.getNode(ARMISD::SUBE, DL, VTs, LHS, RHS, Carry);
6410 
6411  SDValue FVal = DAG.getConstant(0, DL, MVT::i32);
6412  SDValue TVal = DAG.getConstant(1, DL, MVT::i32);
6413  SDValue ARMcc = DAG.getConstant(
6414  IntCCToARMCC(cast<CondCodeSDNode>(Cond)->get()), DL, MVT::i32);
6415  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
6416  SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, ARM::CPSR,
6417  Cmp.getValue(1), SDValue());
6418  return DAG.getNode(ARMISD::CMOV, DL, Op.getValueType(), FVal, TVal, ARMcc,
6419  CCR, Chain.getValue(1));
6420 }
6421 
6422 /// isVMOVModifiedImm - Check if the specified splat value corresponds to a
6423 /// valid vector constant for a NEON or MVE instruction with a "modified
6424 /// immediate" operand (e.g., VMOV). If so, return the encoded value.
6425 static SDValue isVMOVModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
6426  unsigned SplatBitSize, SelectionDAG &DAG,
6427  const SDLoc &dl, EVT &VT, bool is128Bits,
6428  VMOVModImmType type) {
6429  unsigned OpCmode, Imm;
6430 
6431  // SplatBitSize is set to the smallest size that splats the vector, so a
6432  // zero vector will always have SplatBitSize == 8. However, NEON modified
6433  // immediate instructions others than VMOV do not support the 8-bit encoding
6434  // of a zero vector, and the default encoding of zero is supposed to be the
6435  // 32-bit version.
6436  if (SplatBits == 0)
6437  SplatBitSize = 32;
6438 
6439  switch (SplatBitSize) {
6440  case 8:
6441  if (type != VMOVModImm)
6442  return SDValue();
6443  // Any 1-byte value is OK. Op=0, Cmode=1110.
6444  assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big");
6445  OpCmode = 0xe;
6446  Imm = SplatBits;
6447  VT = is128Bits ? MVT::v16i8 : MVT::v8i8;
6448  break;
6449 
6450  case 16:
6451  // NEON's 16-bit VMOV supports splat values where only one byte is nonzero.
6452  VT = is128Bits ? MVT::v8i16 : MVT::v4i16;
6453  if ((SplatBits & ~0xff) == 0) {
6454  // Value = 0x00nn: Op=x, Cmode=100x.
6455  OpCmode = 0x8;
6456  Imm = SplatBits;
6457  break;
6458  }
6459  if ((SplatBits & ~0xff00) == 0) {
6460  // Value = 0xnn00: Op=x, Cmode=101x.
6461  OpCmode = 0xa;
6462  Imm = SplatBits >> 8;
6463  break;
6464  }
6465  return SDValue();
6466 
6467  case 32:
6468  // NEON's 32-bit VMOV supports splat values where:
6469  // * only one byte is nonzero, or
6470  // * the least significant byte is 0xff and the second byte is nonzero, or
6471  // * the least significant 2 bytes are 0xff and the third is nonzero.
6472  VT = is128Bits ? MVT::v4i32 : MVT::v2i32;
6473  if ((SplatBits & ~0xff) == 0) {
6474  // Value = 0x000000nn: Op=x, Cmode=000x.
6475  OpCmode = 0;
6476  Imm = SplatBits;
6477  break;
6478  }
6479  if ((SplatBits & ~0xff00) == 0) {
6480  // Value = 0x0000nn00: Op=x, Cmode=001x.
6481  OpCmode = 0x2;
6482  Imm = SplatBits >> 8;
6483  break;
6484  }
6485  if ((SplatBits & ~0xff0000) == 0) {
6486  // Value = 0x00nn0000: Op=x, Cmode=010x.
6487  OpCmode = 0x4;
6488  Imm = SplatBits >> 16;
6489  break;
6490  }
6491  if ((SplatBits & ~0xff000000) == 0) {
6492  // Value = 0xnn000000: Op=x, Cmode=011x.
6493  OpCmode = 0x6;
6494  Imm = SplatBits >> 24;
6495  break;
6496  }
6497 
6498  // cmode == 0b1100 and cmode == 0b1101 are not supported for VORR or VBIC
6499  if (type == OtherModImm) return SDValue();
6500 
6501  if ((SplatBits & ~0xffff) == 0 &&
6502  ((SplatBits | SplatUndef) & 0xff) == 0xff) {
6503  // Value = 0x0000nnff: Op=x, Cmode=1100.
6504  OpCmode = 0xc;
6505  Imm = SplatBits >> 8;
6506  break;
6507  }
6508 
6509  // cmode == 0b1101 is not supported for MVE VMVN
6510  if (type == MVEVMVNModImm)
6511  return SDValue();
6512 
6513  if ((SplatBits & ~0xffffff) == 0 &&
6514  ((SplatBits | SplatUndef) & 0xffff) == 0xffff) {
6515  // Value = 0x00nnffff: Op=x, Cmode=1101.
6516  OpCmode = 0xd;
6517  Imm = SplatBits >> 16;
6518  break;
6519  }
6520 
6521  // Note: there are a few 32-bit splat values (specifically: 00ffff00,
6522  // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not
6523  // VMOV.I32. A (very) minor optimization would be to replicate the value
6524  // and fall through here to test for a valid 64-bit splat. But, then the
6525  // caller would also need to check and handle the change in size.
6526  return SDValue();
6527 
6528  case 64: {
6529  if (type != VMOVModImm)
6530  return SDValue();
6531  // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff.
6532  uint64_t BitMask = 0xff;
6533  uint64_t Val = 0;
6534  unsigned ImmMask = 1;
6535  Imm = 0;
6536  for (int ByteNum = 0; ByteNum < 8; ++ByteNum) {
6537  if (((SplatBits | SplatUndef) & BitMask) == BitMask) {
6538  Val |= BitMask;
6539  Imm |= ImmMask;
6540  } else if ((SplatBits & BitMask) != 0) {
6541  return SDValue();
6542  }
6543  BitMask <<= 8;
6544  ImmMask <<= 1;
6545  }
6546 
6547  if (DAG.getDataLayout().isBigEndian())
6548  // swap higher and lower 32 bit word
6549  Imm = ((Imm & 0xf) << 4) | ((Imm & 0xf0) >> 4);
6550 
6551  // Op=1, Cmode=1110.
6552  OpCmode = 0x1e;
6553  VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
6554  break;
6555  }
6556 
6557  default:
6558  llvm_unreachable("unexpected size for isVMOVModifiedImm");
6559  }
6560 
6561  unsigned EncodedVal = ARM_AM::createVMOVModImm(OpCmode, Imm);
6562  return DAG.getTargetConstant(EncodedVal, dl, MVT::i32);
6563 }
6564 
6565 SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
6566  const ARMSubtarget *ST) const {
6567  EVT VT = Op.getValueType();
6568  bool IsDouble = (VT == MVT::f64);
6569  ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
6570  const APFloat &FPVal = CFP->getValueAPF();
6571 
6572  // Prevent floating-point constants from using literal loads
6573  // when execute-only is enabled.
6574  if (ST->genExecuteOnly()) {
6575  // If we can represent the constant as an immediate, don't lower it
6576  if (isFPImmLegal(FPVal, VT))
6577  return Op;
6578  // Otherwise, construct as integer, and move to float register
6579  APInt INTVal = FPVal.bitcastToAPInt();
6580  SDLoc DL(CFP);
6581  switch (VT.getSimpleVT().SimpleTy) {
6582  default:
6583  llvm_unreachable("Unknown floating point type!");
6584  break;
6585  case MVT::f64: {
6586  SDValue Lo = DAG.getConstant(INTVal.trunc(32), DL, MVT::i32);
6587  SDValue Hi = DAG.getConstant(INTVal.lshr(32).trunc(32), DL, MVT::i32);
6588  if (!ST->isLittle())
6589  std::swap(Lo, Hi);
6590  return DAG.getNode(ARMISD::VMOVDRR, DL, MVT::f64, Lo, Hi);
6591  }
6592  case MVT::f32:
6593  return DAG.getNode(ARMISD::VMOVSR, DL, VT,
6594  DAG.getConstant(INTVal, DL, MVT::i32));
6595  }
6596  }
6597 
6598  if (!ST->hasVFP3Base())
6599  return SDValue();
6600 
6601  // Use the default (constant pool) lowering for double constants when we have
6602  // an SP-only FPU
6603  if (IsDouble && !Subtarget->hasFP64())
6604  return SDValue();
6605 
6606  // Try splatting with a VMOV.f32...
6607  int ImmVal = IsDouble ? ARM_AM::getFP64Imm(FPVal) : ARM_AM::getFP32Imm(FPVal);
6608 
6609  if (ImmVal != -1) {
6610  if (IsDouble || !ST->useNEONForSinglePrecisionFP()) {
6611  // We have code in place to select a valid ConstantFP already, no need to
6612  // do any mangling.
6613  return Op;
6614  }
6615 
6616  // It's a float and we are trying to use NEON operations where
6617  // possible. Lower it to a splat followed by an extract.
6618  SDLoc DL(Op);
6619  SDValue NewVal = DAG.getTargetConstant(ImmVal, DL, MVT::i32);
6620  SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32,
6621  NewVal);
6622  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecConstant,
6623  DAG.getConstant(0, DL, MVT::i32));
6624  }
6625 
6626  // The rest of our options are NEON only, make sure that's allowed before
6627  // proceeding..
6628  if (!ST->hasNEON() || (!IsDouble && !ST->useNEONForSinglePrecisionFP()))
6629  return SDValue();
6630 
6631  EVT VMovVT;
6632  uint64_t iVal = FPVal.bitcastToAPInt().getZExtValue();
6633 
6634  // It wouldn't really be worth bothering for doubles except for one very
6635  // important value, which does happen to match: 0.0. So make sure we don't do
6636  // anything stupid.
6637  if (IsDouble && (iVal & 0xffffffff) != (iVal >> 32))
6638  return SDValue();
6639 
6640  // Try a VMOV.i32 (FIXME: i8, i16, or i64 could work too).
6641  SDValue NewVal = isVMOVModifiedImm(iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op),
6642  VMovVT, false, VMOVModImm);
6643  if (NewVal != SDValue()) {
6644  SDLoc DL(Op);
6645  SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT,
6646  NewVal);
6647  if (IsDouble)
6648  return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
6649 
6650  // It's a float: cast and extract a vector element.
6651  SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
6652  VecConstant);
6653  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
6654  DAG.getConstant(0, DL, MVT::i32));
6655  }
6656 
6657  // Finally, try a VMVN.i32
6658  NewVal = isVMOVModifiedImm(~iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op), VMovVT,
6659  false, VMVNModImm);
6660  if (NewVal != SDValue()) {
6661  SDLoc DL(Op);
6662  SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal);
6663 
6664  if (IsDouble)
6665  return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
6666 
6667  // It's a float: cast and extract a vector element.
6668  SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
6669  VecConstant);
6670  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
6671  DAG.getConstant(0, DL, MVT::i32));
6672  }
6673 
6674  return SDValue();
6675 }
6676 
6677 // check if an VEXT instruction can handle the shuffle mask when the
6678 // vector sources of the shuffle are the same.
6679 static bool isSingletonVEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) {
6680  unsigned NumElts = VT.getVectorNumElements();
6681 
6682  // Assume that the first shuffle index is not UNDEF. Fail if it is.
6683  if (M[0] < 0)
6684  return false;
6685 
6686  Imm = M[0];
6687 
6688  // If this is a VEXT shuffle, the immediate value is the index of the first
6689  // element. The other shuffle indices must be the successive elements after
6690  // the first one.
6691  unsigned ExpectedElt = Imm;
6692  for (unsigned i = 1; i < NumElts; ++i) {
6693  // Increment the expected index. If it wraps around, just follow it
6694  // back to index zero and keep going.
6695  ++ExpectedElt;
6696  if (ExpectedElt == NumElts)
6697  ExpectedElt = 0;
6698 
6699  if (M[i] < 0) continue; // ignore UNDEF indices
6700  if (ExpectedElt != static_cast<unsigned>(M[i]))
6701  return false;
6702  }
6703 
6704  return true;
6705 }
6706 
6707 static bool isVEXTMask(ArrayRef<int> M, EVT VT,
6708  bool &ReverseVEXT, unsigned &Imm) {
6709  unsigned NumElts = VT.getVectorNumElements();
6710  ReverseVEXT = false;
6711 
6712  // Assume that the first shuffle index is not UNDEF. Fail if it is.
6713  if (M[0] < 0)
6714  return false;
6715 
6716  Imm = M[0];
6717 
6718  // If this is a VEXT shuffle, the immediate value is the index of the first
6719  // element. The other shuffle indices must be the successive elements after
6720  // the first one.
6721  unsigned ExpectedElt = Imm;
6722  for (unsigned i = 1; i < NumElts; ++i) {
6723  // Increment the expected index. If it wraps around, it may still be
6724  // a VEXT but the source vectors must be swapped.
6725  ExpectedElt += 1;
6726  if (ExpectedElt == NumElts * 2) {
6727  ExpectedElt = 0;
6728  ReverseVEXT = true;
6729  }
6730 
6731  if (M[i] < 0) continue; // ignore UNDEF indices
6732  if (ExpectedElt != static_cast<unsigned>(M[i]))
6733  return false;
6734  }
6735 
6736  // Adjust the index value if the source operands will be swapped.
6737  if (ReverseVEXT)
6738  Imm -= NumElts;
6739 
6740  return true;
6741 }
6742 
6743 /// isVREVMask - Check if a vector shuffle corresponds to a VREV
6744 /// instruction with the specified blocksize. (The order of the elements
6745 /// within each block of the vector is reversed.)
6746 static bool isVREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
6747  assert((BlockSize==16 || BlockSize==32 || BlockSize==64) &&
6748  "Only possible block sizes for VREV are: 16, 32, 64");
6749 
6750  unsigned EltSz = VT.getScalarSizeInBits();
6751  if (EltSz == 64)
6752  return false;
6753 
6754  unsigned NumElts = VT.getVectorNumElements();
6755  unsigned BlockElts = M[0] + 1;
6756  // If the first shuffle index is UNDEF, be optimistic.
6757  if (M[0] < 0)
6758  BlockElts = BlockSize / EltSz;
6759 
6760  if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
6761  return false;
6762 
6763  for (unsigned i = 0; i < NumElts; ++i) {
6764  if (M[i] < 0) continue; // ignore UNDEF indices
6765  if ((unsigned) M[i] != (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts))
6766  return false;
6767  }
6768 
6769  return true;
6770 }
6771 
6772 static bool isVTBLMask(ArrayRef<int> M, EVT VT) {
6773  // We can handle <8 x i8> vector shuffles. If the index in the mask is out of
6774  // range, then 0 is placed into the resulting vector. So pretty much any mask
6775  // of 8 elements can work here.
6776  return VT == MVT::v8i8 && M.size() == 8;
6777 }
6778 
6779 static unsigned SelectPairHalf(unsigned Elements, ArrayRef<int> Mask,
6780  unsigned Index) {
6781  if (Mask.size() == Elements * 2)
6782  return Index / Elements;
6783  return Mask[Index] == 0 ? 0 : 1;
6784 }
6785 
6786 // Checks whether the shuffle mask represents a vector transpose (VTRN) by
6787 // checking that pairs of elements in the shuffle mask represent the same index
6788 // in each vector, incrementing the expected index by 2 at each step.
6789 // e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 2, 6]
6790 // v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,c,g}
6791 // v2={e,f,g,h}
6792 // WhichResult gives the offset for each element in the mask based on which
6793 // of the two results it belongs to.
6794 //
6795 // The transpose can be represented either as:
6796 // result1 = shufflevector v1, v2, result1_shuffle_mask
6797 // result2 = shufflevector v1, v2, result2_shuffle_mask
6798 // where v1/v2 and the shuffle masks have the same number of elements
6799 // (here WhichResult (see below) indicates which result is being checked)
6800 //
6801 // or as:
6802 // results = shufflevector v1, v2, shuffle_mask
6803 // where both results are returned in one vector and the shuffle mask has twice
6804 // as many elements as v1/v2 (here WhichResult will always be 0 if true) here we
6805 // want to check the low half and high half of the shuffle mask as if it were
6806 // the other case
6807 static bool isVTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
6808  unsigned EltSz = VT.getScalarSizeInBits();
6809  if (EltSz == 64)
6810  return false;
6811 
6812  unsigned NumElts = VT.getVectorNumElements();
6813  if (M.size() != NumElts && M.size() != NumElts*2)
6814  return false;
6815 
6816  // If the mask is twice as long as the input vector then we need to check the
6817  // upper and lower parts of the mask with a matching value for WhichResult
6818  // FIXME: A mask with only even values will be rejected in case the first
6819  // element is undefined, e.g. [-1, 4, 2, 6] will be rejected, because only
6820  // M[0] is used to determine WhichResult
6821  for (unsigned i = 0; i < M.size(); i += NumElts) {
6822  WhichResult = SelectPairHalf(NumElts, M, i);
6823  for (unsigned j = 0; j < NumElts; j += 2) {
6824  if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) ||
6825  (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + NumElts + WhichResult))
6826  return false;
6827  }
6828  }
6829 
6830  if (M.size() == NumElts*2)
6831  WhichResult = 0;
6832 
6833  return true;
6834 }
6835 
6836 /// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of
6837 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
6838 /// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
6839 static bool isVTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
6840  unsigned EltSz = VT.getScalarSizeInBits();
6841  if (EltSz == 64)
6842  return false;
6843 
6844  unsigned NumElts = VT.getVectorNumElements();
6845  if (M.size() != NumElts && M.size() != NumElts*2)
6846  return false;
6847 
6848  for (unsigned i = 0; i < M.size(); i += NumElts) {
6849  WhichResult = SelectPairHalf(NumElts, M, i);
6850  for (unsigned j = 0; j < NumElts; j += 2) {
6851  if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) ||
6852  (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + WhichResult))
6853  return false;
6854  }
6855  }
6856 
6857  if (M.size() == NumElts*2)
6858  WhichResult = 0;
6859 
6860  return true;
6861 }
6862 
6863 // Checks whether the shuffle mask represents a vector unzip (VUZP) by checking
6864 // that the mask elements are either all even and in steps of size 2 or all odd
6865 // and in steps of size 2.
6866 // e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 2, 4, 6]
6867 // v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,c,e,g}
6868 // v2={e,f,g,h}
6869 // Requires similar checks to that of isVTRNMask with
6870 // respect the how results are returned.
6871 static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
6872  unsigned EltSz = VT.getScalarSizeInBits();
6873  if (EltSz == 64)
6874  return false;
6875 
6876  unsigned NumElts = VT.getVectorNumElements();
6877  if (M.size() != NumElts && M.size() != NumElts*2)
6878  return false;
6879 
6880  for (unsigned i = 0; i < M.size(); i += NumElts) {
6881  WhichResult = SelectPairHalf(NumElts, M, i);
6882  for (unsigned j = 0; j < NumElts; ++j) {
6883  if (M[i+j] >= 0 && (unsigned) M[i+j] != 2 * j + WhichResult)
6884  return false;
6885  }
6886  }
6887 
6888  if (M.size() == NumElts*2)
6889  WhichResult = 0;
6890 
6891  // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
6892  if (VT.is64BitVector() && EltSz == 32)
6893  return false;
6894 
6895  return true;
6896 }
6897 
6898 /// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of
6899 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
6900 /// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
6901 static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
6902  unsigned EltSz = VT.getScalarSizeInBits();
6903  if (EltSz == 64)
6904  return false;
6905 
6906  unsigned NumElts = VT.getVectorNumElements();
6907  if (M.size() != NumElts && M.size() != NumElts*2)
6908  return false;
6909 
6910  unsigned Half = NumElts / 2;
6911  for (unsigned i = 0; i < M.size(); i += NumElts) {
6912  WhichResult = SelectPairHalf(NumElts, M, i);
6913  for (unsigned j = 0; j < NumElts; j += Half) {
6914  unsigned Idx = WhichResult;
6915  for (unsigned k = 0; k < Half; ++k) {
6916  int MIdx = M[i + j + k];
6917  if (MIdx >= 0 && (unsigned) MIdx != Idx)
6918  return false;
6919  Idx += 2;
6920  }
6921  }
6922  }
6923 
6924  if (M.size() == NumElts*2)
6925  WhichResult = 0;
6926 
6927  // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
6928  if (VT.is64BitVector() && EltSz == 32)
6929  return false;
6930 
6931  return true;
6932 }
6933 
6934 // Checks whether the shuffle mask represents a vector zip (VZIP) by checking
6935 // that pairs of elements of the shufflemask represent the same index in each
6936 // vector incrementing sequentially through the vectors.
6937 // e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 1, 5]
6938 // v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,b,f}
6939 // v2={e,f,g,h}
6940 // Requires similar checks to that of isVTRNMask with respect the how results
6941 // are returned.
6942 static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
6943  unsigned EltSz = VT.getScalarSizeInBits();
6944  if (EltSz == 64)
6945  return false;
6946 
6947  unsigned NumElts = VT.getVectorNumElements();
6948  if (M.size() != NumElts && M.size() != NumElts*2)
6949  return false;
6950 
6951  for (unsigned i = 0; i < M.size(); i += NumElts) {
6952  WhichResult = SelectPairHalf(NumElts, M, i);
6953  unsigned Idx = WhichResult * NumElts / 2;
6954  for (unsigned j = 0; j < NumElts; j += 2) {
6955  if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||
6956  (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx + NumElts))
6957  return false;
6958  Idx += 1;
6959  }
6960  }
6961 
6962  if (M.size() == NumElts*2)
6963  WhichResult = 0;
6964 
6965  // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
6966  if (VT.is64BitVector() && EltSz == 32)
6967  return false;
6968 
6969  return true;
6970 }
6971 
6972 /// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of
6973 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
6974 /// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
6975 static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
6976  unsigned EltSz = VT.getScalarSizeInBits();
6977  if (EltSz == 64)
6978  return false;
6979 
6980  unsigned NumElts = VT.getVectorNumElements();
6981  if (M.size() != NumElts && M.size() != NumElts*2)
6982  return false;
6983 
6984  for (unsigned i = 0; i < M.size(); i += NumElts) {
6985  WhichResult = SelectPairHalf(NumElts, M, i);
6986  unsigned Idx = WhichResult * NumElts / 2;
6987  for (unsigned j = 0; j < NumElts; j += 2) {
6988  if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||
6989  (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx))
6990  return false;
6991  Idx += 1;
6992  }
6993  }
6994 
6995  if (M.size() == NumElts*2)
6996  WhichResult = 0;
6997 
6998  // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
6999  if (VT.is64BitVector() && EltSz == 32)
7000  return false;
7001 
7002  return true;
7003 }
7004 
7005 /// Check if \p ShuffleMask is a NEON two-result shuffle (VZIP, VUZP, VTRN),
7006 /// and return the corresponding ARMISD opcode if it is, or 0 if it isn't.
7007 static unsigned isNEONTwoResultShuffleMask(ArrayRef<int> ShuffleMask, EVT VT,
7008  unsigned &WhichResult,
7009  bool &isV_UNDEF) {
7010  isV_UNDEF = false;
7011  if (isVTRNMask(ShuffleMask, VT, WhichResult))
7012  return ARMISD::VTRN;
7013  if (isVUZPMask(ShuffleMask, VT, WhichResult))
7014  return ARMISD::VUZP;
7015  if (isVZIPMask(ShuffleMask, VT, WhichResult))
7016  return ARMISD::VZIP;
7017 
7018  isV_UNDEF = true;
7019  if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult))
7020  return ARMISD::VTRN;
7021  if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult))
7022  return ARMISD::VUZP;
7023  if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult))
7024  return ARMISD::VZIP;
7025 
7026  return 0;
7027 }
7028 
7029 /// \return true if this is a reverse operation on an vector.
7030 static bool isReverseMask(ArrayRef<int> M, EVT VT) {
7031  unsigned NumElts = VT.getVectorNumElements();
7032  // Make sure the mask has the right size.
7033  if (NumElts != M.size())
7034  return false;
7035 
7036  // Look for <15, ..., 3, -1, 1, 0>.
7037  for (unsigned i = 0; i != NumElts; ++i)
7038  if (M[i] >= 0 && M[i] != (int) (NumElts - 1 - i))
7039  return false;
7040 
7041  return true;
7042 }
7043 
7044 static bool isVMOVNMask(ArrayRef<int> M, EVT VT, bool Top) {
7045  unsigned NumElts = VT.getVectorNumElements();
7046  // Make sure the mask has the right size.
7047  if (NumElts != M.size() || (VT != MVT::v8i16 && VT != MVT::v16i8))
7048  return false;
7049 
7050  // If Top
7051  // Look for <0, N, 2, N+2, 4, N+4, ..>.
7052  // This inserts Input2 into Input1
7053  // else if not Top
7054  // Look for <0, N+1, 2, N+3, 4, N+5, ..>
7055  // This inserts Input1 into Input2
7056  unsigned Offset = Top ? 0 : 1;
7057  for (unsigned i = 0; i < NumElts; i+=2) {
7058  if (M[i] >= 0 && M[i] != (int)i)
7059  return false;
7060  if (M[i+1] >= 0 && M[i+1] != (int)(NumElts + i + Offset))
7061  return false;
7062  }
7063 
7064  return true;
7065 }
7066 
7067 // If N is an integer constant that can be moved into a register in one
7068 // instruction, return an SDValue of such a constant (will become a MOV
7069 // instruction). Otherwise return null.
7071  const ARMSubtarget *ST, const SDLoc &dl) {
7072  uint64_t Val;
7073  if (!isa<ConstantSDNode>(N))
7074  return SDValue();
7075  Val = cast<ConstantSDNode>(N)->getZExtValue();
7076 
7077  if (ST->isThumb1Only()) {
7078  if (Val <= 255 || ~Val <= 255)
7079  return DAG.getConstant(Val, dl, MVT::i32);
7080  } else {
7081  if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1)
7082  return DAG.getConstant(Val, dl, MVT::i32);
7083  }
7084  return SDValue();
7085 }
7086 
7088  const ARMSubtarget *ST) {
7089  SDLoc dl(Op);
7090  EVT VT = Op.getValueType();
7091 
7092  assert(ST->hasMVEIntegerOps() && "LowerBUILD_VECTOR_i1 called without MVE!");
7093 
7094  unsigned NumElts = VT.getVectorNumElements();
7095  unsigned BoolMask;
7096  unsigned BitsPerBool;
7097  if (NumElts == 4) {
7098  BitsPerBool = 4;
7099  BoolMask = 0xf;
7100  } else if (NumElts == 8) {
7101  BitsPerBool = 2;
7102  BoolMask = 0x3;
7103  } else if (NumElts == 16) {
7104  BitsPerBool = 1;
7105  BoolMask = 0x1;
7106  } else
7107  return SDValue();
7108 
7109  // If this is a single value copied into all lanes (a splat), we can just sign
7110  // extend that single value
7111  SDValue FirstOp = Op.getOperand(0);
7112  if (!isa<ConstantSDNode>(FirstOp) &&
7113  std::all_of(std::next(Op->op_begin()), Op->op_end(),
7114  [&FirstOp](SDUse &U) {
7115  return U.get().isUndef() || U.get() == FirstOp;
7116  })) {
7117  SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::i32, FirstOp,
7118  DAG.getValueType(MVT::i1));
7119  return DAG.getNode(ARMISD::PREDICATE_CAST, dl, Op.getValueType(), Ext);
7120  }
7121 
7122  // First create base with bits set where known
7123  unsigned Bits32 = 0;
7124  for (unsigned i = 0; i < NumElts; ++i) {
7125  SDValue V = Op.getOperand(i);
7126  if (!isa<ConstantSDNode>(V) && !V.isUndef())
7127  continue;
7128  bool BitSet = V.isUndef() ? false : cast<ConstantSDNode>(V)->getZExtValue();
7129  if (BitSet)
7130  Bits32 |= BoolMask << (i * BitsPerBool);
7131  }
7132 
7133  // Add in unknown nodes
7135  DAG.getConstant(Bits32, dl, MVT::i32));
7136  for (unsigned i = 0; i < NumElts; ++i) {
7137  SDValue V = Op.getOperand(i);
7138  if (isa<ConstantSDNode>(V) || V.isUndef())
7139  continue;
7140  Base = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Base, V,
7141  DAG.getConstant(i, dl, MVT::i32));
7142  }
7143 
7144  return Base;
7145 }
7146 
7147 // If this is a case we can't handle, return null and let the default
7148 // expansion code take care of it.
7149 SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
7150  const ARMSubtarget *ST) const {
7151  BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
7152  SDLoc dl(Op);
7153  EVT VT = Op.getValueType();
7154 
7155  if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == 1)
7156  return LowerBUILD_VECTOR_i1(Op, DAG, ST);
7157 
7158  APInt SplatBits, SplatUndef;
7159  unsigned SplatBitSize;
7160  bool HasAnyUndefs;
7161  if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
7162  if (SplatUndef.isAllOnesValue())
7163  return DAG.getUNDEF(VT);
7164 
7165  if ((ST->hasNEON() && SplatBitSize <= 64) ||
7166  (ST->hasMVEIntegerOps() && SplatBitSize <= 32)) {
7167  // Check if an immediate VMOV works.
7168  EVT VmovVT;
7169  SDValue Val = isVMOVModifiedImm(SplatBits.getZExtValue(),
7170  SplatUndef.getZExtValue(), SplatBitSize,
7171  DAG, dl, VmovVT, VT.is128BitVector(),
7172  VMOVModImm);
7173 
7174  if (Val.getNode()) {
7175  SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val);
7176  return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
7177  }
7178 
7179  // Try an immediate VMVN.
7180  uint64_t NegatedImm = (~SplatBits).getZExtValue();
7181  Val = isVMOVModifiedImm(
7182  NegatedImm, SplatUndef.getZExtValue(), SplatBitSize,
7183  DAG, dl, VmovVT, VT.is128BitVector(),
7185  if (Val.getNode()) {
7186  SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val);
7187  return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
7188  }
7189 
7190  // Use vmov.f32 to materialize other v2f32 and v4f32 splats.
7191  if ((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) {
7192  int ImmVal = ARM_AM::getFP32Imm(SplatBits);
7193  if (ImmVal != -1) {
7194  SDValue Val = DAG.getTargetConstant(ImmVal, dl, MVT::i32);
7195  return DAG.getNode(ARMISD::VMOVFPIMM, dl, VT, Val);
7196  }
7197  }
7198  }
7199  }
7200 
7201  // Scan through the operands to see if only one value is used.
7202  //
7203  // As an optimisation, even if more than one value is used it may be more
7204  // profitable to splat with one value then change some lanes.
7205  //
7206  // Heuristically we decide to do this if the vector has a "dominant" value,
7207  // defined as splatted to more than half of the lanes.
7208  unsigned NumElts = VT.getVectorNumElements();
7209  bool isOnlyLowElement = true;
7210  bool usesOnlyOneValue = true;
7211  bool hasDominantValue = false;
7212  bool isConstant = true;
7213 
7214  // Map of the number of times a particular SDValue appears in the
7215  // element list.
7216  DenseMap<SDValue, unsigned> ValueCounts;
7217  SDValue Value;
7218  for (unsigned i = 0; i < NumElts; ++i) {
7219  SDValue V = Op.getOperand(i);
7220  if (V.isUndef())
7221  continue;
7222  if (i > 0)
7223  isOnlyLowElement = false;
7224  if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
7225  isConstant = false;
7226 
7227  ValueCounts.insert(std::make_pair(V, 0));
7228  unsigned &Count = ValueCounts[V];
7229 
7230  // Is this value dominant? (takes up more than half of the lanes)
7231  if (++Count > (NumElts / 2)) {
7232  hasDominantValue = true;
7233  Value = V;
7234  }
7235  }
7236  if (ValueCounts.size() != 1)
7237  usesOnlyOneValue = false;
7238  if (!Value.getNode() && !ValueCounts.empty())
7239  Value = ValueCounts.begin()->first;
7240 
7241  if (ValueCounts.empty())
7242  return DAG.getUNDEF(VT);
7243 
7244  // Loads are better lowered with insert_vector_elt/ARMISD::BUILD_VECTOR.
7245  // Keep going if we are hitting this case.
7246  if (isOnlyLowElement && !ISD::isNormalLoad(Value.getNode()))
7247  return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
7248 
7249  unsigned EltSize = VT.getScalarSizeInBits();
7250 
7251  // Use VDUP for non-constant splats. For f32 constant splats, reduce to
7252  // i32 and try again.
7253  if (hasDominantValue && EltSize <= 32) {
7254  if (!isConstant) {
7255  SDValue N;
7256 
7257  // If we are VDUPing a value that comes directly from a vector, that will
7258  // cause an unnecessary move to and from a GPR, where instead we could
7259  // just use VDUPLANE. We can only do this if the lane being extracted
7260  // is at a constant index, as the VDUP from lane instructions only have
7261  // constant-index forms.
7262  ConstantSDNode *constIndex;
7263  if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7264  (constIndex = dyn_cast<ConstantSDNode>(Value->getOperand(1)))) {
7265  // We need to create a new undef vector to use for the VDUPLANE if the
7266  // size of the vector from which we get the value is different than the
7267  // size of the vector that we need to create. We will insert the element
7268  // such that the register coalescer will remove unnecessary copies.
7269  if (VT != Value->getOperand(0).getValueType()) {
7270  unsigned index = constIndex->getAPIntValue().getLimitedValue() %
7271  VT.getVectorNumElements();
7272  N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
7273  DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DAG.getUNDEF(VT),
7274  Value, DAG.getConstant(index, dl, MVT::i32)),
7275  DAG.getConstant(index, dl, MVT::i32));
7276  } else
7277  N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
7278  Value->getOperand(0), Value->getOperand(1));
7279  } else
7280  N = DAG.getNode(ARMISD::VDUP, dl, VT, Value);
7281 
7282  if (!usesOnlyOneValue) {
7283  // The dominant value was splatted as 'N', but we now have to insert
7284  // all differing elements.
7285  for (unsigned I = 0; I < NumElts; ++I) {
7286  if (Op.getOperand(I) == Value)
7287  continue;
7289  Ops.push_back(N);
7290  Ops.push_back(Op.getOperand(I));
7291  Ops.push_back(DAG.getConstant(I, dl, MVT::i32));
7292  N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Ops);
7293  }
7294  }
7295  return N;
7296  }
7299  MVT FVT = VT.getVectorElementType().getSimpleVT();
7300  assert(FVT == MVT::f32 || FVT == MVT::f16);
7301  MVT IVT = (FVT == MVT::f32) ? MVT::i32 : MVT::i16;
7302  for (unsigned i = 0; i < NumElts; ++i)
7303  Ops.push_back(DAG.getNode(ISD::BITCAST, dl, IVT,
7304  Op.getOperand(i)));
7305  EVT VecVT = EVT::getVectorVT(*DAG.getContext(), IVT, NumElts);
7306  SDValue Val = DAG.getBuildVector(VecVT, dl, Ops);
7307  Val = LowerBUILD_VECTOR(Val, DAG, ST);
7308  if (Val.getNode())
7309  return DAG.getNode(ISD::BITCAST, dl, VT, Val);
7310  }
7311  if (usesOnlyOneValue) {
7312  SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
7313  if (isConstant && Val.getNode())
7314  return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
7315  }
7316  }
7317 
7318  // If all elements are constants and the case above didn't get hit, fall back
7319  // to the default expansion, which will generate a load from the constant
7320  // pool.
7321  if (isConstant)
7322  return SDValue();
7323 
7324  // Empirical tests suggest this is rarely worth it for vectors of length <= 2.
7325  if (NumElts >= 4) {
7326  SDValue shuffle = ReconstructShuffle(Op, DAG);
7327  if (shuffle != SDValue())
7328  return shuffle;
7329  }
7330 
7331  if (ST->hasNEON() && VT.is128BitVector() && VT != MVT::v2f64 && VT != MVT::v4f32) {
7332  // If we haven't found an efficient lowering, try splitting a 128-bit vector
7333  // into two 64-bit vectors; we might discover a better way to lower it.
7334  SmallVector<SDValue, 64> Ops(Op->op_begin(), Op->op_begin() + NumElts);
7335  EVT ExtVT = VT.getVectorElementType();
7336  EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElts / 2);
7337  SDValue Lower =
7338  DAG.getBuildVector(HVT, dl, makeArrayRef(&Ops[0], NumElts / 2));
7339  if (Lower.getOpcode() == ISD::BUILD_VECTOR)
7340  Lower = LowerBUILD_VECTOR(Lower, DAG, ST);
7342  HVT, dl, makeArrayRef(&Ops[NumElts / 2], NumElts / 2));
7343  if (Upper.getOpcode() == ISD::BUILD_VECTOR)
7344  Upper = LowerBUILD_VECTOR(Upper, DAG, ST);
7345  if (Lower && Upper)
7346  return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lower, Upper);
7347  }
7348 
7349  // Vectors with 32- or 64-bit elements can be built by directly assigning
7350  // the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands
7351  // will be legalized.
7352  if (EltSize >= 32) {
7353  // Do the expansion with floating-point types, since that is what the VFP
7354  // registers are defined to use, and since i64 is not legal.
7355  EVT EltVT = EVT::getFloatingPointVT(EltSize);
7356  EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
7358  for (unsigned i = 0; i < NumElts; ++i)
7359  Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i)));
7360  SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);
7361  return DAG.getNode(ISD::BITCAST, dl, VT, Val);
7362  }
7363 
7364  // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
7365  // know the default expansion would otherwise fall back on something even
7366  // worse. For a vector with one or two non-undef values, that's
7367  // scalar_to_vector for the elements followed by a shuffle (provided the
7368  // shuffle is valid for the target) and materialization element by element
7369  // on the stack followed by a load for everything else.
7370  if (!isConstant && !usesOnlyOneValue) {
7371  SDValue Vec = DAG.getUNDEF(VT);
7372  for (unsigned i = 0 ; i < NumElts; ++i) {
7373  SDValue V = Op.getOperand(i);
7374  if (V.isUndef())
7375  continue;
7376  SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i32);
7377  Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx);
7378  }
7379  return Vec;
7380  }
7381 
7382  return SDValue();
7383 }
7384 
7385 // Gather data to see if the operation can be modelled as a
7386 // shuffle in combination with VEXTs.
7387 SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
7388  SelectionDAG &DAG) const {
7389  assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
7390  SDLoc dl(Op);
7391  EVT VT = Op.getValueType();
7392  unsigned NumElts = VT.getVectorNumElements();
7393 
7394  struct ShuffleSourceInfo {
7395  SDValue Vec;
7396  unsigned MinElt = std::numeric_limits<unsigned>::max();
7397  unsigned MaxElt = 0;
7398 
7399  // We may insert some combination of BITCASTs and VEXT nodes to force Vec to
7400  // be compatible with the shuffle we intend to construct. As a result
7401  // ShuffleVec will be some sliding window into the original Vec.
7402  SDValue ShuffleVec;
7403 
7404  // Code should guarantee that element i in Vec starts at element "WindowBase
7405  // + i * WindowScale in ShuffleVec".
7406  int WindowBase = 0;
7407  int WindowScale = 1;
7408 
7409  ShuffleSourceInfo(SDValue Vec) : Vec(Vec), ShuffleVec(Vec) {}
7410 
7411  bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }
7412  };
7413 
7414  // First gather all vectors used as an immediate source for this BUILD_VECTOR
7415  // node.
7417  for (unsigned i = 0; i < NumElts; ++i) {
7418  SDValue V = Op.getOperand(i);
7419  if (V.isUndef())
7420  continue;
7421  else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) {
7422  // A shuffle can only come from building a vector from various
7423  // elements of other vectors.
7424  return SDValue();
7425  } else if (!isa<ConstantSDNode>(V.getOperand(1))) {
7426  // Furthermore, shuffles require a constant mask, whereas extractelts
7427  // accept variable indices.
7428  return SDValue();
7429  }
7430 
7431  // Add this element source to the list if it's not already there.
7432  SDValue SourceVec = V.getOperand(0);
7433  auto Source = llvm::find(Sources, SourceVec);
7434  if (Source == Sources.end())
7435  Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec));
7436 
7437  // Update the minimum and maximum lane number seen.
7438  unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
7439  Source->MinElt = std::min(Source->MinElt, EltNo);
7440  Source->MaxElt = std::max(Source->MaxElt, EltNo);
7441  }
7442 
7443  // Currently only do something sane when at most two source vectors
7444  // are involved.
7445  if (Sources.size() > 2)
7446  return SDValue();
7447 
7448  // Find out the smallest element size among result and two sources, and use
7449  // it as element size to build the shuffle_vector.
7450  EVT SmallestEltTy = VT.getVectorElementType();
7451  for (auto &Source : Sources) {
7452  EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType();
7453  if (SrcEltTy.bitsLT(SmallestEltTy))
7454  SmallestEltTy = SrcEltTy;
7455  }
7456  unsigned ResMultiplier =
7457  VT.getScalarSizeInBits() / SmallestEltTy.getSizeInBits();
7458  NumElts = VT.getSizeInBits() / SmallestEltTy.getSizeInBits();
7459  EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts);
7460 
7461  // If the source vector is too wide or too narrow, we may nevertheless be able
7462  // to construct a compatible shuffle either by concatenating it with UNDEF or
7463  // extracting a suitable range of elements.
7464  for (auto &Src : Sources) {
7465  EVT SrcVT = Src.ShuffleVec.getValueType();
7466 
7467  if (SrcVT.getSizeInBits() == VT.getSizeInBits())
7468  continue;
7469 
7470  // This stage of the search produces a source with the same element type as
7471  // the original, but with a total width matching the BUILD_VECTOR output.
7472  EVT EltVT = SrcVT.getVectorElementType();
7473  unsigned NumSrcElts = VT.getSizeInBits() / EltVT.getSizeInBits();
7474  EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts);
7475 
7476  if (SrcVT.getSizeInBits() < VT.getSizeInBits()) {
7477  if (2 * SrcVT.getSizeInBits() != VT.getSizeInBits())
7478  return SDValue();
7479  // We can pad out the smaller vector for free, so if it's part of a
7480  // shuffle...
7481  Src.ShuffleVec =
7482  DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, Src.ShuffleVec,
7483  DAG.getUNDEF(Src.ShuffleVec.getValueType()));
7484  continue;
7485  }
7486 
7487  if (SrcVT.getSizeInBits() != 2 * VT.getSizeInBits())
7488  return SDValue();
7489 
7490  if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
7491  // Span too large for a VEXT to cope
7492  return SDValue();
7493  }
7494 
7495  if (Src.MinElt >= NumSrcElts) {
7496  // The extraction can just take the second half
7497  Src.ShuffleVec =
7498  DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
7499  DAG.getConstant(NumSrcElts, dl, MVT::i32));
7500  Src.WindowBase = -NumSrcElts;
7501  } else if (Src.MaxElt < NumSrcElts) {
7502  // The extraction can just take the first half
7503  Src.ShuffleVec =
7504  DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
7505  DAG.getConstant(0, dl, MVT::i32));
7506  } else {
7507  // An actual VEXT is needed
7508  SDValue VEXTSrc1 =
7509  DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
7510  DAG.getConstant(0, dl, MVT::i32));
7511  SDValue VEXTSrc2 =
7512  DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
7513  DAG.getConstant(NumSrcElts, dl, MVT::i32));
7514 
7515  Src.ShuffleVec = DAG.getNode(ARMISD::VEXT, dl, DestVT, VEXTSrc1,
7516  VEXTSrc2,
7517  DAG.getConstant(Src.MinElt, dl, MVT::i32));
7518  Src.WindowBase = -Src.MinElt;
7519  }
7520  }
7521 
7522  // Another possible incompatibility occurs from the vector element types. We
7523  // can fix this by bitcasting the source vectors to the same type we intend
7524  // for the shuffle.
7525  for (auto &Src : Sources) {
7526  EVT SrcEltTy = Src.ShuffleVec.getValueType().getVectorElementType();
7527  if (SrcEltTy == SmallestEltTy)
7528  continue;
7529  assert(ShuffleVT.getVectorElementType() == SmallestEltTy);
7530  Src.ShuffleVec = DAG.getNode(ISD::BITCAST, dl, ShuffleVT, Src.ShuffleVec);
7531  Src.WindowScale = SrcEltTy.getSizeInBits() / SmallestEltTy.getSizeInBits();
7532  Src.WindowBase *= Src.WindowScale;
7533  }
7534 
7535  // Final sanity check before we try to actually produce a shuffle.
7536  LLVM_DEBUG(for (auto Src
7537  : Sources)
7538  assert(Src.ShuffleVec.getValueType() == ShuffleVT););
7539 
7540  // The stars all align, our next step is to produce the mask for the shuffle.
7542  int BitsPerShuffleLane = ShuffleVT.getScalarSizeInBits();
7543  for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
7544  SDValue Entry = Op.getOperand(i);
7545  if (Entry.isUndef())
7546  continue;
7547 
7548  auto Src = llvm::find(Sources, Entry.getOperand(0));
7549  int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
7550 
7551  // EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an implicit
7552  // trunc. So only std::min(SrcBits, DestBits) actually get defined in this
7553  // segment.
7554  EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();
7555  int BitsDefined = std::min(OrigEltTy.getSizeInBits(),
7556  VT.getScalarSizeInBits());
7557  int LanesDefined = BitsDefined / BitsPerShuffleLane;
7558 
7559  // This source is expected to fill ResMultiplier lanes of the final shuffle,
7560  // starting at the appropriate offset.
7561  int *LaneMask = &Mask[i * ResMultiplier];
7562 
7563  int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
7564  ExtractBase += NumElts * (Src - Sources.begin());
7565  for (int j = 0; j < LanesDefined; ++j)
7566  LaneMask[j] = ExtractBase + j;
7567  }
7568 
7569 
7570  // We can't handle more than two sources. This should have already
7571  // been checked before this point.
7572  assert(Sources.size() <= 2 && "Too many sources!");
7573 
7574  SDValue ShuffleOps[] = { DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT) };
7575  for (unsigned i = 0; i < Sources.size(); ++i)
7576  ShuffleOps[i] = Sources[i].ShuffleVec;
7577 
7578  SDValue Shuffle = buildLegalVectorShuffle(ShuffleVT, dl, ShuffleOps[0],
7579  ShuffleOps[1], Mask, DAG);
7580  if (!Shuffle)
7581  return SDValue();
7582  return DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
7583 }
7584 
7586  OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
7595  OP_VUZPL, // VUZP, left result
7596  OP_VUZPR, // VUZP, right result
7597  OP_VZIPL, // VZIP, left result
7598  OP_VZIPR, // VZIP, right result
7599  OP_VTRNL, // VTRN, left result
7600  OP_VTRNR // VTRN, right result
7601 };
7602 
7603 static bool isLegalMVEShuffleOp(unsigned PFEntry) {
7604  unsigned OpNum = (PFEntry >> 26) & 0x0F;
7605  switch (OpNum) {
7606  case OP_COPY:
7607  case OP_VREV:
7608  case OP_VDUP0:
7609  case OP_VDUP1:
7610  case OP_VDUP2:
7611  case OP_VDUP3:
7612  return true;
7613  }
7614  return false;
7615 }
7616 
7617 /// isShuffleMaskLegal - Targets can use this to indicate that they only
7618 /// support *some* VECTOR_SHUFFLE operations, those with specific masks.
7619 /// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
7620 /// are assumed to be legal.
7622  if (VT.getVectorNumElements() == 4 &&
7623  (VT.is128BitVector() || VT.is64BitVector())) {
7624  unsigned PFIndexes[4];
7625  for (unsigned i = 0; i != 4; ++i) {
7626  if (M[i] < 0)
7627  PFIndexes[i] = 8;
7628  else
7629  PFIndexes[i] = M[i];
7630  }
7631 
7632  // Compute the index in the perfect shuffle table.
7633  unsigned PFTableIndex =
7634  PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
7635  unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
7636  unsigned Cost = (PFEntry >> 30);
7637 
7638  if (Cost <= 4 && (Subtarget->hasNEON() || isLegalMVEShuffleOp(PFEntry)))
7639  return true;
7640  }
7641 
7642  bool ReverseVEXT, isV_UNDEF;
7643  unsigned Imm, WhichResult;
7644 
7645  unsigned EltSize = VT.getScalarSizeInBits();
7646  if (EltSize >= 32 ||
7647  ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
7649  isVREVMask(M, VT, 64) ||
7650  isVREVMask(M, VT, 32) ||
7651  isVREVMask(M, VT, 16))
7652  return true;
7653  else if (Subtarget->hasNEON() &&
7654  (isVEXTMask(M, VT, ReverseVEXT, Imm) ||
7655  isVTBLMask(M, VT) ||
7656  isNEONTwoResultShuffleMask(M, VT, WhichResult, isV_UNDEF)))
7657  return true;
7658  else if (Subtarget->hasNEON() && (VT == MVT::v8i16 || VT == MVT::v16i8) &&
7659  isReverseMask(M, VT))
7660  return true;
7661  else if (Subtarget->hasMVEIntegerOps() &&
7662  (isVMOVNMask(M, VT, 0) || isVMOVNMask(M, VT, 1)))
7663  return true;
7664  else
7665  return false;
7666 }
7667 
7668 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
7669 /// the specified operations to build the shuffle.
7670 static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
7671  SDValue RHS, SelectionDAG &DAG,
7672  const SDLoc &dl) {
7673  unsigned OpNum = (PFEntry >> 26) & 0x0F;
7674  unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
7675  unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
7676 
7677  if (OpNum == OP_COPY) {
7678  if (LHSID == (1*9+2)*9+3) return LHS;
7679  assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
7680  return RHS;
7681  }
7682 
7683  SDValue OpLHS, OpRHS;
7684  OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
7685  OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
7686  EVT VT = OpLHS.getValueType();
7687 
7688  switch (OpNum) {
7689  default: llvm_unreachable("Unknown shuffle opcode!");
7690  case OP_VREV:
7691  // VREV divides the vector in half and swaps within the half.
7692  if (VT.getVectorElementType() == MVT::i32 ||
7694  return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS);
7695  // vrev <4 x i16> -> VREV32
7696  if (VT.getVectorElementType() == MVT::i16)
7697  return DAG.getNode(ARMISD::VREV32, dl, VT, OpLHS);
7698  // vrev <4 x i8> -> VREV16
7700  return DAG.getNode(ARMISD::VREV16, dl, VT, OpLHS);
7701  case OP_VDUP0:
7702  case OP_VDUP1:
7703  case OP_VDUP2:
7704  case OP_VDUP3:
7705  return DAG.getNode(ARMISD::VDUPLANE, dl, VT,
7706  OpLHS, DAG.getConstant(OpNum-OP_VDUP0, dl, MVT::i32));
7707  case OP_VEXT1:
7708  case OP_VEXT2:
7709  case OP_VEXT3:
7710  return DAG.getNode(ARMISD::VEXT, dl, VT,
7711  OpLHS, OpRHS,
7712  DAG.getConstant(OpNum - OP_VEXT1 + 1, dl, MVT::i32));
7713  case OP_VUZPL:
7714  case OP_VUZPR:
7715  return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
7716  OpLHS, OpRHS).getValue(OpNum-OP_VUZPL);
7717  case OP_VZIPL:
7718  case OP_VZIPR:
7719  return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
7720  OpLHS, OpRHS).getValue(OpNum-OP_VZIPL);
7721  case OP_VTRNL:
7722  case OP_VTRNR:
7723  return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
7724  OpLHS, OpRHS).getValue(OpNum-OP_VTRNL);
7725  }
7726 }
7727 
7729  ArrayRef<int> ShuffleMask,
7730  SelectionDAG &DAG) {
7731  // Check to see if we can use the VTBL instruction.
7732  SDValue V1 = Op.getOperand(0);
7733  SDValue V2 = Op.getOperand(1);
7734  SDLoc DL(Op);
7735 
7736  SmallVector<SDValue, 8> VTBLMask;
7738  I = ShuffleMask.begin(), E = ShuffleMask.end(); I != E; ++I)
7739  VTBLMask.push_back(DAG.getConstant(*I, DL, MVT::i32));
7740 
7741  if (V2.getNode()->isUndef())
7742  return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1,
7743  DAG.getBuildVector(MVT::v8i8, DL, VTBLMask));
7744 
7745  return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2,
7746  DAG.getBuildVector(MVT::v8i8, DL, VTBLMask));
7747 }
7748 
7750  SelectionDAG &DAG) {
7751  SDLoc DL(Op);
7752  SDValue OpLHS = Op.getOperand(0);
7753  EVT VT = OpLHS.getValueType();
7754 
7755  assert((VT == MVT::v8i16 || VT == MVT::v16i8) &&
7756  "Expect an v8i16/v16i8 type");
7757  OpLHS = DAG.getNode(ARMISD::VREV64, DL, VT, OpLHS);
7758  // For a v16i8 type: After the VREV, we have got <8, ...15, 8, ..., 0>. Now,
7759  // extract the first 8 bytes into the top double word and the last 8 bytes
7760  // into the bottom double word. The v8i16 case is similar.
7761  unsigned ExtractNum = (VT == MVT::v16i8) ? 8 : 4;
7762  return DAG.getNode(ARMISD::VEXT, DL, VT, OpLHS, OpLHS,
7763  DAG.getConstant(ExtractNum, DL, MVT::i32));
7764 }
7765 
7767  switch (VT.getSimpleVT().SimpleTy) {
7768  case MVT::v4i1:
7769  return MVT::v4i32;
7770  case MVT::v8i1:
7771  return MVT::v8i16;
7772  case MVT::v16i1:
7773  return MVT::v16i8;
7774  default:
7775  llvm_unreachable("Unexpected vector predicate type");
7776  }
7777 }
7778 
7780  SelectionDAG &DAG) {
7781  // Converting from boolean predicates to integers involves creating a vector
7782  // of all ones or all zeroes and selecting the lanes based upon the real
7783  // predicate.
7784  SDValue AllOnes =
7786  AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v16i8, AllOnes);
7787 
7788  SDValue AllZeroes =
7790  AllZeroes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v16i8, AllZeroes);
7791 
7792  // Get full vector type from predicate type
7793  EVT NewVT = getVectorTyFromPredicateVector(VT);
7794 
7795  SDValue RecastV1;
7796  // If the real predicate is an v8i1 or v4i1 (not v16i1) then we need to recast
7797  // this to a v16i1. This cannot be done with an ordinary bitcast because the
7798  // sizes are not the same. We have to use a MVE specific PREDICATE_CAST node,
7799  // since we know in hardware the sizes are really the same.
7800  if (VT != MVT::v16i1)
7801  RecastV1 = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v16i1, Pred);
7802  else
7803  RecastV1 = Pred;
7804 
7805  // Select either all ones or zeroes depending upon the real predicate bits.
7806  SDValue PredAsVector =
7807  DAG.getNode(ISD::VSELECT, dl, MVT::v16i8, RecastV1, AllOnes, AllZeroes);
7808 
7809  // Recast our new predicate-as-integer v16i8 vector into something
7810  // appropriate for the shuffle, i.e. v4i32 for a real v4i1 predicate.
7811  return DAG.getNode(ISD::BITCAST, dl, NewVT, PredAsVector);
7812 }
7813 
7815  const ARMSubtarget *ST) {
7816  EVT VT = Op.getValueType();
7817  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
7818  ArrayRef<int> ShuffleMask = SVN->getMask();
7819 
7820  assert(ST->hasMVEIntegerOps() &&
7821  "No support for vector shuffle of boolean predicates");
7822 
7823  SDValue V1 = Op.getOperand(0);
7824  SDLoc dl(Op);
7825  if (isReverseMask(ShuffleMask, VT)) {
7827  SDValue rbit = DAG.getNode(ISD::BITREVERSE, dl, MVT::i32, cast);
7828  SDValue srl = DAG.getNode(ISD::SRL, dl, MVT::i32, rbit,
7829  DAG.getConstant(16, dl, MVT::i32));
7830  return DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT, srl);
7831  }
7832 
7833  // Until we can come up with optimised cases for every single vector
7834  // shuffle in existence we have chosen the least painful strategy. This is
7835  // to essentially promote the boolean predicate to a 8-bit integer, where
7836  // each predicate represents a byte. Then we fall back on a normal integer
7837  // vector shuffle and convert the result back into a predicate vector. In
7838  // many cases the generated code might be even better than scalar code
7839  // operating on bits. Just imagine trying to shuffle 8 arbitrary 2-bit
7840  // fields in a register into 8 other arbitrary 2-bit fields!
7841  SDValue PredAsVector = PromoteMVEPredVector(dl, V1, VT, DAG);
7842  EVT NewVT = PredAsVector.getValueType();
7843 
7844  // Do the shuffle!
7845  SDValue Shuffled = DAG.getVectorShuffle(NewVT, dl, PredAsVector,
7846  DAG.getUNDEF(NewVT), ShuffleMask);
7847 
7848  // Now return the result of comparing the shuffled vector with zero,
7849  // which will generate a real predicate, i.e. v4i1, v8i1 or v16i1.
7850  return DAG.getNode(ARMISD::VCMPZ, dl, VT, Shuffled,
7851  DAG.getConstant(ARMCC::NE, dl, MVT::i32));
7852 }
7853 
7855  ArrayRef<int> ShuffleMask,
7856  SelectionDAG &DAG) {
7857  // Attempt to lower the vector shuffle using as many whole register movs as
7858  // possible. This is useful for types smaller than 32bits, which would
7859  // often otherwise become a series for grp movs.
7860  SDLoc dl(Op);
7861  EVT VT = Op.getValueType();
7862  if (VT.getScalarSizeInBits() >= 32)
7863  return SDValue();
7864 
7865  assert((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&
7866  "Unexpected vector type");
7867  int NumElts = VT.getVectorNumElements();
7868  int QuarterSize = NumElts / 4;
7869  // The four final parts of the vector, as i32's
7870  SDValue Parts[4];
7871 
7872  // Look for full lane vmovs like <0,1,2,3> or <u,5,6,7> etc, (but not
7873  // <u,u,u,u>), returning the vmov lane index
7874  auto getMovIdx = [](ArrayRef<int> ShuffleMask, int Start, int Length) {
7875  // Detect which mov lane this would be from the first non-undef element.
7876  int MovIdx = -1;
7877  for (int i = 0; i < Length; i++) {
7878  if (ShuffleMask[Start + i] >= 0) {
7879  if (ShuffleMask[Start + i] % Length != i)
7880  return -1;
7881  MovIdx = ShuffleMask[Start + i] / Length;
7882  break;
7883  }
7884  }
7885  // If all items are undef, leave this for other combines
7886  if (MovIdx == -1)
7887  return -1;
7888  // Check the remaining values are the correct part of the same mov
7889  for (int i = 1; i < Length; i++) {
7890  if (ShuffleMask[Start + i] >= 0 &&
7891  (ShuffleMask[Start + i] / Length != MovIdx ||
7892  ShuffleMask[Start + i] % Length != i))
7893  return -1;
7894  }
7895  return MovIdx;
7896  };
7897 
7898  for (int Part = 0; Part < 4; ++Part) {
7899  // Does this part look like a mov
7900  int Elt = getMovIdx(ShuffleMask, Part * QuarterSize, QuarterSize);
7901  if (Elt != -1) {
7902  SDValue Input = Op->getOperand(0);
7903  if (Elt >= 4) {
7904  Input = Op->getOperand(1);
7905  Elt -= 4;
7906  }
7907  SDValue BitCast = DAG.getBitcast(MVT::v4i32, Input);
7908  Parts[Part] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, BitCast,
7909  DAG.getConstant(Elt, dl, MVT::i32));
7910  }
7911  }
7912 
7913  // Nothing interesting found, just return
7914  if (!Parts[0] && !Parts[1] && !Parts[2] && !Parts[3])
7915  return SDValue();
7916 
7917  // The other parts need to be built with the old shuffle vector, cast to a
7918  // v4i32 and extract_vector_elts
7919  if (!Parts[0] || !Parts[1] || !Parts[2] || !Parts[3]) {
7920  SmallVector<int, 16> NewShuffleMask;
7921  for (int Part = 0; Part < 4; ++Part)
7922  for (int i = 0; i < QuarterSize; i++)
7923  NewShuffleMask.push_back(
7924  Parts[Part] ? -1 : ShuffleMask[Part * QuarterSize + i]);
7925  SDValue NewShuffle = DAG.getVectorShuffle(
7926  VT, dl, Op->getOperand(0), Op->getOperand(1), NewShuffleMask);
7927  SDValue BitCast = DAG.getBitcast(MVT::v4i32, NewShuffle);
7928 
7929  for (int Part = 0; Part < 4; ++Part)
7930  if (!Parts[Part])
7931  Parts[Part] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
7932  BitCast, DAG.getConstant(Part, dl, MVT::i32));
7933  }
7934  // Build a vector out of the various parts and bitcast it back to the original
7935  // type.
7936  SDValue NewVec = DAG.getBuildVector(MVT::v4i32, dl, Parts);
7937  return DAG.getBitcast(VT, NewVec);
7938 }
7939 
7941  const ARMSubtarget *ST) {
7942  SDValue V1 = Op.getOperand(0);
7943  SDValue V2 = Op.getOperand(1);
7944  SDLoc dl(Op);
7945  EVT VT = Op.getValueType();
7946  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
7947  unsigned EltSize = VT.getScalarSizeInBits();
7948 
7949  if (ST->hasMVEIntegerOps() && EltSize == 1)
7950  return LowerVECTOR_SHUFFLE_i1(Op, DAG, ST);
7951 
7952  // Convert shuffles that are directly supported on NEON to target-specific
7953  // DAG nodes, instead of keeping them as shuffles and matching them again
7954  // during code selection. This is more efficient and avoids the possibility
7955  // of inconsistencies between legalization and selection.
7956  // FIXME: floating-point vectors should be canonicalized to integer vectors
7957  // of the same time so that they get CSEd properly.
7958  ArrayRef<int> ShuffleMask = SVN->getMask();
7959 
7960  if (EltSize <= 32) {
7961  if (SVN->isSplat()) {
7962  int Lane = SVN->getSplatIndex();
7963  // If this is undef splat, generate it via "just" vdup, if possible.
7964  if (Lane == -1) Lane = 0;
7965 
7966  // Test if V1 is a SCALAR_TO_VECTOR.
7967  if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
7968  return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
7969  }
7970  // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR
7971  // (and probably will turn into a SCALAR_TO_VECTOR once legalization
7972  // reaches it).
7973  if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR &&
7974  !isa<ConstantSDNode>(V1.getOperand(0))) {
7975  bool IsScalarToVector = true;
7976  for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i)
7977  if (!V1.getOperand(i).isUndef()) {
7978  IsScalarToVector = false;
7979  break;
7980  }
7981  if (IsScalarToVector)
7982  return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
7983  }
7984  return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1,
7985  DAG.getConstant(Lane, dl, MVT::i32));
7986  }
7987 
7988  bool ReverseVEXT = false;
7989  unsigned Imm = 0;
7990  if (ST->hasNEON() && isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) {
7991  if (ReverseVEXT)
7992  std::swap(V1, V2);
7993  return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2,
7994  DAG.getConstant(Imm, dl, MVT::i32));
7995  }
7996 
7997  if (isVREVMask(ShuffleMask, VT, 64))
7998  return DAG.getNode(ARMISD::VREV64, dl, VT, V1);
7999  if (isVREVMask(ShuffleMask, VT, 32))
8000  return DAG.getNode(ARMISD::VREV32, dl, VT, V1);
8001  if (isVREVMask(ShuffleMask, VT, 16))
8002  return DAG.getNode(ARMISD::VREV16, dl, VT, V1);
8003 
8004  if (ST->hasNEON() && V2->isUndef() && isSingletonVEXTMask(ShuffleMask, VT, Imm)) {
8005  return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V1,
8006  DAG.getConstant(Imm, dl, MVT::i32));
8007  }
8008 
8009  // Check for Neon shuffles that modify both input vectors in place.
8010  // If both results are used, i.e., if there are two shuffles with the same
8011  // source operands and with masks corresponding to both results of one of
8012  // these operations, DAG memoization will ensure that a single node is
8013  // used for both shuffles.
8014  unsigned WhichResult = 0;
8015  bool isV_UNDEF = false;
8016  if (ST->hasNEON()) {
8017  if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask(
8018  ShuffleMask, VT, WhichResult, isV_UNDEF)) {
8019  if (isV_UNDEF)
8020  V2 = V1;
8021  return DAG.getNode(ShuffleOpc, dl, DAG.getVTList(VT, VT), V1, V2)
8022  .getValue(WhichResult);
8023  }
8024  }
8025  if (ST->hasMVEIntegerOps()) {
8026  if (isVMOVNMask(ShuffleMask, VT, 0))
8027  return DAG.getNode(ARMISD::VMOVN, dl, VT, V2, V1,
8028  DAG.getConstant(0, dl, MVT::i32));
8029  if (isVMOVNMask(ShuffleMask, VT, 1))
8030  return DAG.getNode(ARMISD::VMOVN, dl, VT, V1, V2,
8031  DAG.getConstant(1, dl, MVT::i32));
8032  }
8033 
8034  // Also check for these shuffles through CONCAT_VECTORS: we canonicalize
8035  // shuffles that produce a result larger than their operands with:
8036  // shuffle(concat(v1, undef), concat(v2, undef))
8037  // ->
8038  // shuffle(concat(v1, v2), undef)
8039  // because we can access quad vectors (see PerformVECTOR_SHUFFLECombine).
8040  //
8041  // This is useful in the general case, but there are special cases where
8042  // native shuffles produce larger results: the two-result ops.
8043  //
8044  // Look through the concat when lowering them:
8045  // shuffle(concat(v1, v2), undef)
8046  // ->
8047  // concat(VZIP(v1, v2):0, :1)
8048  //
8049  if (ST->hasNEON() && V1->getOpcode() == ISD::CONCAT_VECTORS && V2->isUndef()) {
8050  SDValue SubV1 = V1->getOperand(0);
8051  SDValue SubV2 = V1->getOperand(1);
8052  EVT SubVT = SubV1.getValueType();
8053 
8054  // We expect these to have been canonicalized to -1.
8055  assert(llvm::all_of(ShuffleMask, [&](int i) {
8056  return i < (int)VT.getVectorNumElements();
8057  }) && "Unexpected shuffle index into UNDEF operand!");
8058 
8059  if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask(
8060  ShuffleMask, SubVT, WhichResult, isV_UNDEF)) {
8061  if (isV_UNDEF)
8062  SubV2 = SubV1;
8063  assert((WhichResult == 0) &&
8064  "In-place shuffle of concat can only have one result!");
8065  SDValue Res = DAG.getNode(ShuffleOpc, dl, DAG.getVTList(SubVT, SubVT),
8066  SubV1, SubV2);
8067  return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Res.getValue(0),
8068  Res.getValue(1));
8069  }
8070  }
8071  }
8072 
8073  // If the shuffle is not directly supported and it has 4 elements, use
8074  // the PerfectShuffle-generated table to synthesize it from other shuffles.
8075  unsigned NumElts = VT.getVectorNumElements();
8076  if (NumElts == 4) {
8077  unsigned PFIndexes[4];
8078  for (unsigned i = 0; i != 4; ++i) {
8079  if (ShuffleMask[i] < 0)
8080  PFIndexes[i] = 8;
8081  else
8082  PFIndexes[i] = ShuffleMask[i];
8083  }
8084 
8085  // Compute the index in the perfect shuffle table.
8086  unsigned PFTableIndex =
8087  PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
8088  unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
8089  unsigned Cost = (PFEntry >> 30);
8090 
8091  if (Cost <= 4) {
8092  if (ST->hasNEON())
8093  return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
8094  else if (isLegalMVEShuffleOp(PFEntry)) {
8095  unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
8096  unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
8097  unsigned PFEntryLHS = PerfectShuffleTable[LHSID];
8098  unsigned PFEntryRHS = PerfectShuffleTable[RHSID];
8099  if (isLegalMVEShuffleOp(PFEntryLHS) && isLegalMVEShuffleOp(PFEntryRHS))
8100  return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
8101  }
8102  }
8103  }
8104 
8105  // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs.
8106  if (EltSize >= 32) {
8107  // Do the expansion with floating-point types, since that is what the VFP
8108  // registers are defined to use, and since i64 is not legal.
8109  EVT EltVT = EVT::getFloatingPointVT(EltSize);
8110  EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
8111  V1 = DAG.getNode(ISD::BITCAST, dl, VecVT, V1);
8112  V2 = DAG.getNode(ISD::BITCAST, dl, VecVT, V2);
8114  for (unsigned i = 0; i < NumElts; ++i) {
8115  if (ShuffleMask[i] < 0)
8116  Ops.push_back(DAG.getUNDEF(EltVT));
8117  else
8118  Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
8119  ShuffleMask[i] < (int)NumElts ? V1 : V2,
8120  DAG.getConstant(ShuffleMask[i] & (NumElts-1),
8121  dl, MVT::i32)));
8122  }
8123  SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);
8124  return DAG.getNode(ISD::BITCAST, dl, VT, Val);
8125  }
8126 
8127  if (ST->hasNEON() && (VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(ShuffleMask, VT))
8129 
8130  if (ST->hasNEON() && VT == MVT::v8i8)
8131  if (SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG))
8132  return NewOp;
8133 
8134  if (ST->hasMVEIntegerOps())
8135  if (SDValue NewOp = LowerVECTOR_SHUFFLEUsingMovs(Op, ShuffleMask, DAG))
8136  return NewOp;
8137 
8138  return SDValue();
8139 }
8140 
8142  const ARMSubtarget *ST) {
8143  EVT VecVT = Op.getOperand(0).getValueType();
8144  SDLoc dl(Op);
8145 
8146  assert(ST->hasMVEIntegerOps() &&
8147  "LowerINSERT_VECTOR_ELT_i1 called without MVE!");
8148 
8149  SDValue Conv =
8151  unsigned Lane = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
8152  unsigned LaneWidth =
8154  unsigned Mask = ((1 << LaneWidth) - 1) << Lane * LaneWidth;
8156  Op.getOperand(1), DAG.getValueType(MVT::i1));
8157  SDValue BFI = DAG.getNode(ARMISD::BFI, dl, MVT::i32, Conv, Ext,
8158  DAG.getConstant(~Mask, dl, MVT::i32));
8159  return DAG.getNode(ARMISD::PREDICATE_CAST, dl, Op.getValueType(), BFI);
8160 }
8161 
8162 SDValue ARMTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
8163  SelectionDAG &DAG) const {
8164  // INSERT_VECTOR_ELT is legal only for immediate indexes.
8165  SDValue Lane = Op.getOperand(2);
8166  if (!isa<ConstantSDNode>(Lane))
8167  return SDValue();
8168 
8169  SDValue Elt = Op.getOperand(1);
8170  EVT EltVT = Elt.getValueType();
8171 
8172  if (Subtarget->hasMVEIntegerOps() &&
8173  Op.getValueType().getScalarSizeInBits() == 1)
8174  return LowerINSERT_VECTOR_ELT_i1(Op, DAG, Subtarget);
8175 
8176  if (getTypeAction(*DAG.getContext(), EltVT) ==
8178  // INSERT_VECTOR_ELT doesn't want f16 operands promoting to f32,
8179  // but the type system will try to do that if we don't intervene.
8180  // Reinterpret any such vector-element insertion as one with the
8181  // corresponding integer types.
8182 
8183  SDLoc dl(Op);
8184 
8185  EVT IEltVT = MVT::getIntegerVT(EltVT.getScalarSizeInBits());
8186  assert(getTypeAction(*DAG.getContext(), IEltVT) !=
8188 
8189  SDValue VecIn = Op.getOperand(0);
8190  EVT VecVT = VecIn.getValueType();
8191  EVT IVecVT = EVT::getVectorVT(*DAG.getContext(), IEltVT,
8192  VecVT.getVectorNumElements());
8193 
8194  SDValue IElt = DAG.getNode(ISD::BITCAST, dl, IEltVT, Elt);
8195  SDValue IVecIn = DAG.getNode(ISD::BITCAST, dl, IVecVT, VecIn);
8196  SDValue IVecOut = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, IVecVT,
8197  IVecIn, IElt, Lane);
8198  return DAG.getNode(ISD::BITCAST, dl, VecVT, IVecOut);
8199  }
8200 
8201  return Op;
8202 }
8203 
8205  const ARMSubtarget *ST) {
8206  EVT VecVT = Op.getOperand(0).getValueType();
8207  SDLoc dl(Op);
8208 
8209  assert(ST->hasMVEIntegerOps() &&
8210  "LowerINSERT_VECTOR_ELT_i1 called without MVE!");
8211 
8212  SDValue Conv =
8214  unsigned Lane = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
8215  unsigned LaneWidth =
8217  SDValue Shift = DAG.getNode(ISD::SRL, dl, MVT::i32, Conv,
8218  DAG.getConstant(Lane * LaneWidth, dl, MVT::i32));
8219  return Shift;
8220 }
8221 
8223  const ARMSubtarget *ST) {
8224  // EXTRACT_VECTOR_ELT is legal only for immediate indexes.
8225  SDValue Lane = Op.getOperand(1);
8226  if (!isa<ConstantSDNode>(Lane))
8227  return SDValue();
8228 
8229  SDValue Vec = Op.getOperand(0);
8230  EVT VT = Vec.getValueType();
8231 
8232  if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == 1)
8233  return LowerEXTRACT_VECTOR_ELT_i1(Op, DAG, ST);
8234 
8235  if (Op.getValueType() == MVT::i32 && Vec.getScalarValueSizeInBits() < 32) {
8236  SDLoc dl(Op);
8237  return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane);
8238  }
8239 
8240  return Op;
8241 }
8242 
8244  const ARMSubtarget *ST) {
8245  SDValue V1 = Op.getOperand(0);
8246  SDValue V2 = Op.getOperand(1);
8247  SDLoc dl(Op);
8248  EVT VT = Op.getValueType();
8249  EVT Op1VT = V1.getValueType();
8250  EVT Op2VT = V2.getValueType();
8251  unsigned NumElts = VT.getVectorNumElements();
8252 
8253  assert(Op1VT == Op2VT && "Operand types don't match!");
8254  assert(VT.getScalarSizeInBits() == 1 &&
8255  "Unexpected custom CONCAT_VECTORS lowering");
8256  assert(ST->hasMVEIntegerOps() &&
8257  "CONCAT_VECTORS lowering only supported for MVE");
8258 
8259  SDValue NewV1 = PromoteMVEPredVector(dl, V1, Op1VT, DAG);
8260  SDValue NewV2 = PromoteMVEPredVector(dl, V2, Op2VT, DAG);
8261 
8262  // We now have Op1 + Op2 promoted to vectors of integers, where v8i1 gets
8263  // promoted to v8i16, etc.
8264 
8266 
8267  // Extract the vector elements from Op1 and Op2 one by one and truncate them
8268  // to be the right size for the destination. For example, if Op1 is v4i1 then
8269  // the promoted vector is v4i32. The result of concatentation gives a v8i1,
8270  // which when promoted is v8i16. That means each i32 element from Op1 needs
8271  // truncating to i16 and inserting in the result.
8272  EVT ConcatVT = MVT::getVectorVT(ElType, NumElts);
8273  SDValue ConVec = DAG.getNode(ISD::UNDEF, dl, ConcatVT);
8274  auto ExractInto = [&DAG, &dl](SDValue NewV, SDValue ConVec, unsigned &j) {
8275  EVT NewVT = NewV.getValueType();
8276  EVT ConcatVT = ConVec.getValueType();
8277  for (unsigned i = 0, e = NewVT.getVectorNumElements(); i < e; i++, j++) {
8278  SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, NewV,
8279  DAG.getIntPtrConstant(i, dl));
8280  ConVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ConcatVT, ConVec, Elt,
8281  DAG.getConstant(j, dl, MVT::i32));
8282  }
8283  return ConVec;
8284  };
8285  unsigned j = 0;
8286  ConVec = ExractInto(NewV1, ConVec, j);
8287  ConVec = ExractInto(NewV2, ConVec, j);
8288 
8289  // Now return the result of comparing the subvector with zero,
8290  // which will generate a real predicate, i.e. v4i1, v8i1 or v16i1.
8291  return DAG.getNode(ARMISD::VCMPZ, dl, VT, ConVec,
8292  DAG.getConstant(ARMCC::NE, dl, MVT::i32));
8293 }
8294 
8296  const ARMSubtarget *ST) {
8297  EVT VT = Op->getValueType(0);
8298  if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == 1)
8299  return LowerCONCAT_VECTORS_i1(Op, DAG, ST);
8300 
8301  // The only time a CONCAT_VECTORS operation can have legal types is when
8302  // two 64-bit vectors are concatenated to a 128-bit vector.
8303  assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 &&
8304  "unexpected CONCAT_VECTORS");
8305  SDLoc dl(Op);
8306  SDValue Val = DAG.getUNDEF(MVT::v2f64);
8307  SDValue Op0 = Op.getOperand(0);
8308  SDValue Op1 = Op.getOperand(1);
8309  if (!Op0.isUndef())
8310  Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
8311  DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op0),
8312  DAG.getIntPtrConstant(0, dl));
8313  if (!Op1.isUndef())
8314  Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
8315  DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op1),
8316  DAG.getIntPtrConstant(1, dl));
8317  return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Val);
8318 }
8319 
8321  const ARMSubtarget *ST) {
8322  SDValue V1 = Op.getOperand(0);
8323  SDValue V2 = Op.getOperand(1);
8324  SDLoc dl(Op);
8325  EVT VT = Op.getValueType();
8326  EVT Op1VT = V1.getValueType();
8327  unsigned NumElts = VT.getVectorNumElements();
8328  unsigned Index = cast<ConstantSDNode>(V2)->getZExtValue();
8329 
8330  assert(VT.getScalarSizeInBits() == 1 &&
8331  "Unexpected custom EXTRACT_SUBVECTOR lowering");
8332  assert(ST->hasMVEIntegerOps() &&
8333  "EXTRACT_SUBVECTOR lowering only supported for MVE");
8334 
8335  SDValue NewV1 = PromoteMVEPredVector(dl, V1, Op1VT, DAG);
8336 
8337  // We now have Op1 promoted to a vector of integers, where v8i1 gets
8338  // promoted to v8i16, etc.
8339 
8341 
8342  EVT SubVT = MVT::getVectorVT(ElType, NumElts);
8343  SDValue SubVec = DAG.getNode(ISD::UNDEF, dl, SubVT);
8344  for (unsigned i = Index, j = 0; i < (Index + NumElts); i++, j++) {
8345  SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, NewV1,
8346  DAG.getIntPtrConstant(i, dl));
8347  SubVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubVT, SubVec, Elt,
8348  DAG.getConstant(j, dl, MVT::i32));
8349  }
8350 
8351  // Now return the result of comparing the subvector with zero,
8352  // which will generate a real predicate, i.e. v4i1, v8i1 or v16i1.
8353  return DAG.getNode(ARMISD::VCMPZ, dl, VT, SubVec,
8354  DAG.getConstant(ARMCC::NE, dl, MVT::i32));
8355 }
8356 
8357 /// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each
8358 /// element has been zero/sign-extended, depending on the isSigned parameter,
8359 /// from an integer type half its size.
8361  bool isSigned) {
8362  // A v2i64 BUILD_VECTOR will have been legalized to a BITCAST from v4i32.
8363  EVT VT = N->getValueType(0);
8364  if (VT == MVT::v2i64 && N->getOpcode() == ISD::BITCAST) {
8365  SDNode *BVN = N->getOperand(0).getNode();
8366  if (BVN->getValueType(0) != MVT::v4i32 ||
8367  BVN->getOpcode() != ISD::BUILD_VECTOR)
8368  return false;
8369  unsigned LoElt = DAG.getDataLayout().isBigEndian() ? 1 : 0;
8370  unsigned HiElt = 1 - LoElt;
8371  ConstantSDNode *Lo0 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt));
8372  ConstantSDNode *Hi0 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt));
8373  ConstantSDNode *Lo1 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt+2));
8374  ConstantSDNode *Hi1 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt+2));
8375  if (!Lo0 || !Hi0 || !Lo1 || !Hi1)
8376  return false;
8377  if (isSigned) {
8378  if (Hi0->getSExtValue() == Lo0->getSExtValue() >> 32 &&
8379  Hi1->getSExtValue() == Lo1->getSExtValue() >> 32)
8380  return true;
8381  } else {
8382  if (Hi0->isNullValue() && Hi1->isNullValue())
8383  return true;
8384  }
8385  return false;
8386  }
8387 
8388  if (N->getOpcode() != ISD::BUILD_VECTOR)
8389  return false;
8390 
8391  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
8392  SDNode *Elt = N->getOperand(i).getNode();
8393  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
8394  unsigned EltSize = VT.getScalarSizeInBits();
8395  unsigned HalfSize = EltSize / 2;
8396  if (isSigned) {
8397  if (!isIntN(HalfSize, C->getSExtValue()))
8398  return false;
8399  } else {
8400  if (!isUIntN(HalfSize, C->getZExtValue()))
8401  return false;
8402  }
8403  continue;
8404  }
8405  return false;
8406  }
8407 
8408  return true;
8409 }
8410 
8411 /// isSignExtended - Check if a node is a vector value that is sign-extended
8412 /// or a constant BUILD_VECTOR with sign-extended elements.
8413 static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
8414  if (N->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N))
8415  return true;
8416  if (isExtendedBUILD_VECTOR(N, DAG, true))
8417  return true;
8418  return false;
8419 }
8420 
8421 /// isZeroExtended - Check if a node is a vector value that is zero-extended
8422 /// or a constant BUILD_VECTOR with zero-extended elements.
8423 static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
8424  if (N->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N))
8425  return true;
8426  if (isExtendedBUILD_VECTOR(N, DAG, false))
8427  return true;
8428  return false;
8429 }
8430 
8431 static EVT getExtensionTo64Bits(const EVT &OrigVT) {
8432  if (OrigVT.getSizeInBits() >= 64)
8433  return OrigVT;
8434 
8435  assert(OrigVT.isSimple() && "Expecting a simple value type");
8436 
8437  MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
8438  switch (OrigSimpleTy) {
8439  default: llvm_unreachable("Unexpected Vector Type");
8440  case MVT::v2i8:
8441  case MVT::v2i16:
8442  return MVT::v2i32;
8443  case MVT::v4i8:
8444  return MVT::v4i16;
8445  }
8446 }
8447 
8448 /// AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total
8449 /// value size to 64 bits. We need a 64-bit D register as an operand to VMULL.
8450 /// We insert the required extension here to get the vector to fill a D register.
8452  const EVT &OrigTy,
8453  const EVT &ExtTy,
8454  unsigned ExtOpcode) {
8455  // The vector originally had a size of OrigTy. It was then extended to ExtTy.
8456  // We expect the ExtTy to be 128-bits total. If the OrigTy is less than
8457  // 64-bits we need to insert a new extension so that it will be 64-bits.
8458  assert(ExtTy.is128BitVector() && "Unexpected extension size");
8459  if (OrigTy.getSizeInBits() >= 64)
8460  return N;
8461 
8462  // Must extend size to at least 64 bits to be used as an operand for VMULL.
8463  EVT NewVT = getExtensionTo64Bits(OrigTy);
8464 
8465  return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);
8466 }
8467 
8468 /// SkipLoadExtensionForVMULL - return a load of the original vector size that
8469 /// does not do any sign/zero extension. If the original vector is less
8470 /// than 64 bits, an appropriate extension will be added after the load to
8471 /// reach a total size of 64 bits. We have to add the extension separately
8472 /// because ARM does not have a sign/zero extending load for vectors.
8474  EVT ExtendedTy = getExtensionTo64Bits(LD->getMemoryVT());
8475 
8476  // The load already has the right type.
8477  if (ExtendedTy == LD->getMemoryVT())
8478  return DAG.getLoad(LD->getMemoryVT(), SDLoc(LD), LD->getChain(),
8479  LD->getBasePtr(), LD->getPointerInfo(),
8480  LD->getAlignment(), LD->getMemOperand()->getFlags());
8481 
8482  // We need to create a zextload/sextload. We cannot just create a load
8483  // followed by a zext/zext node because LowerMUL is also run during normal
8484  // operation legalization where we can't create illegal types.
8485  return DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), ExtendedTy,
8486  LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(),
8487  LD->getMemoryVT(), LD->getAlignment(),
8488  LD->getMemOperand()->getFlags());
8489 }
8490 
8491 /// SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND,
8492 /// extending load, or BUILD_VECTOR with extended elements, return the
8493 /// unextended value. The unextended vector should be 64 bits so that it can
8494 /// be used as an operand to a VMULL instruction. If the original vector size
8495 /// before extension is less than 64 bits we add a an extension to resize
8496 /// the vector to 64 bits.
8498  if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND)
8499  return AddRequiredExtensionForVMULL(N->getOperand(0), DAG,
8500  N->getOperand(0)->getValueType(0),
8501  N->getValueType(0),
8502  N->getOpcode());
8503 
8504  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
8506  "Expected extending load");
8507 
8508  SDValue newLoad = SkipLoadExtensionForVMULL(LD, DAG);
8509  DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), newLoad.getValue(1));
8510  unsigned Opcode = ISD::isSEXTLoad(LD) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
8511  SDValue extLoad =
8512  DAG.getNode(Opcode, SDLoc(newLoad), LD->getValueType(0), newLoad);
8513  DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 0), extLoad);
8514 
8515  return newLoad;
8516  }
8517 
8518  // Otherwise, the value must be a BUILD_VECTOR. For v2i64, it will
8519  // have been legalized as a BITCAST from v4i32.
8520  if (N->getOpcode() == ISD::BITCAST) {
8521  SDNode *BVN = N->getOperand(0).getNode();
8522  assert(BVN->getOpcode() == ISD::BUILD_VECTOR &&
8523  BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR");
8524  unsigned LowElt = DAG.getDataLayout().isBigEndian() ? 1 : 0;
8525  return DAG.getBuildVector(
8526  MVT::v2i32, SDLoc(N),
8527  {BVN->getOperand(LowElt), BVN->getOperand(LowElt + 2)});
8528  }
8529  // Construct a new BUILD_VECTOR with elements truncated to half the size.
8530  assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR");
8531  EVT VT = N->getValueType(0);
8532  unsigned EltSize = VT.getScalarSizeInBits() / 2;
8533  unsigned NumElts = VT.getVectorNumElements();
8534  MVT TruncVT = MVT::getIntegerVT(EltSize);
8536  SDLoc dl(N);
8537  for (unsigned i = 0; i != NumElts; ++i) {
8538  ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
8539  const APInt &CInt = C->getAPIntValue();
8540  // Element types smaller than 32 bits are not legal, so use i32 elements.
8541  // The values are implicitly truncated so sext vs. zext doesn't matter.
8542  Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
8543  }
8544  return DAG.getBuildVector(MVT::getVectorVT(TruncVT, NumElts), dl, Ops);
8545 }
8546 
8547 static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
8548  unsigned Opcode = N->getOpcode();
8549  if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
8550  SDNode *N0 = N->getOperand(0).getNode();
8551  SDNode *N1 = N->getOperand(1).getNode();
8552  return N0->hasOneUse() && N1->hasOneUse() &&
8553  isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
8554  }
8555  return false;
8556 }
8557 
8558 static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
8559  unsigned Opcode = N->getOpcode();
8560  if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
8561  SDNode *N0 = N->getOperand(0).getNode();
8562  SDNode *N1 = N->getOperand(1).getNode();
8563  return N0->hasOneUse() && N1->hasOneUse() &&
8564  isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
8565  }
8566  return false;
8567 }
8568 
8570  // Multiplications are only custom-lowered for 128-bit vectors so that
8571  // VMULL can be detected. Otherwise v2i64 multiplications are not legal.
8572  EVT VT = Op.getValueType();
8573  assert(VT.is128BitVector() && VT.isInteger() &&
8574  "unexpected type for custom-lowering ISD::MUL");
8575  SDNode *N0 = Op.getOperand(0).getNode();
8576  SDNode *N1 = Op.getOperand(1).getNode();
8577  unsigned NewOpc = 0;
8578  bool isMLA = false;
8579  bool isN0SExt = isSignExtended(N0, DAG);
8580  bool isN1SExt = isSignExtended(N1, DAG);
8581  if (isN0SExt && isN1SExt)
8582  NewOpc = ARMISD::VMULLs;
8583  else {
8584  bool isN0ZExt = isZeroExtended(N0, DAG);
8585  bool isN1ZExt = isZeroExtended(N1, DAG);
8586  if (isN0ZExt && isN1ZExt)
8587  NewOpc = ARMISD::VMULLu;
8588  else if (isN1SExt || isN1ZExt) {
8589  // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
8590  // into (s/zext A * s/zext C) + (s/zext B * s/zext C)
8591  if (isN1SExt && isAddSubSExt(N0, DAG)) {
8592  NewOpc = ARMISD::VMULLs;
8593  isMLA = true;
8594  } else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
8595  NewOpc = ARMISD::VMULLu;
8596  isMLA = true;
8597  } else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
8598  std::swap(N0, N1);
8599  NewOpc = ARMISD::VMULLu;
8600  isMLA = true;
8601  }
8602  }
8603 
8604  if (!NewOpc) {
8605  if (VT == MVT::v2i64)
8606  // Fall through to expand this. It is not legal.
8607  return SDValue();
8608  else
8609  // Other vector multiplications are legal.
8610  return Op;
8611  }
8612  }
8613 
8614  // Legalize to a VMULL instruction.
8615  SDLoc DL(Op);
8616  SDValue Op0;
8617  SDValue Op1 = SkipExtensionForVMULL(N1, DAG);
8618  if (!isMLA) {
8619  Op0 = SkipExtensionForVMULL(N0, DAG);
8620  assert(Op0.getValueType().is64BitVector() &&
8621  Op1.getValueType().is64BitVector() &&
8622  "unexpected types for extended operands to VMULL");
8623  return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
8624  }
8625 
8626  // Optimizing (zext A + zext B) * C, to (VMULL A, C) + (VMULL B, C) during
8627  // isel lowering to take advantage of no-stall back to back vmul + vmla.
8628  // vmull q0, d4, d6
8629  // vmlal q0, d5, d6
8630  // is faster than
8631  // vaddl q0, d4, d5
8632  // vmovl q1, d6
8633  // vmul q0, q0, q1
8634  SDValue N00 = SkipExtensionForVMULL(N0->getOperand(0).getNode(), DAG);
8635  SDValue N01 = SkipExtensionForVMULL(N0->getOperand(1).getNode(), DAG);
8636  EVT Op1VT = Op1.getValueType();
8637  return DAG.getNode(N0->getOpcode(), DL, VT,
8638  DAG.getNode(NewOpc, DL, VT,
8639  DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
8640  DAG.getNode(NewOpc, DL, VT,
8641  DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
8642 }
8643 
8645  SelectionDAG &DAG) {
8646  // TODO: Should this propagate fast-math-flags?
8647 
8648  // Convert to float
8649  // float4 xf = vcvt_f32_s32(vmovl_s16(a.lo));
8650  // float4 yf = vcvt_f32_s32(vmovl_s16(b.lo));
8651  X = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, X);
8652  Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Y);
8653  X = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, X);
8654  Y = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, Y);
8655  // Get reciprocal estimate.
8656  // float4 recip = vrecpeq_f32(yf);
8658  DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
8659  Y);
8660  // Because char has a smaller range than uchar, we can actually get away
8661  // without any newton steps. This requires that we use a weird bias
8662  // of 0xb000, however (again, this has been exhaustively tested).
8663  // float4 result = as_float4(as_int4(xf*recip) + 0xb000);
8664  X = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, X, Y);
8665  X = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, X);
8666  Y = DAG.getConstant(0xb000, dl, MVT::v4i32);
8667  X = DAG.getNode(ISD::ADD, dl, MVT::v4i32, X, Y);
8668  X = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, X);
8669  // Convert back to short.
8670  X = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, X);
8671  X = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, X);
8672  return X;
8673 }
8674 
8675 static SDValue LowerSDIV_v4i16(SDValue N0, SDValue N1, const SDLoc &dl,
8676  SelectionDAG &DAG) {
8677  // TODO: Should this propagate fast-math-flags?
8678 
8679  SDValue N2;
8680  // Convert to float.
8681  // float4 yf = vcvt_f32_s32(vmovl_s16(y));
8682  // float4 xf = vcvt_f32_s32(vmovl_s16(x));
8683  N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N0);
8684  N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N1);
8685  N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
8686  N1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
8687 
8688  // Use reciprocal estimate and one refinement step.
8689  // float4 recip = vrecpeq_f32(yf);
8690  // recip *= vrecpsq_f32(yf, recip);
8692  DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
8693  N1);
8695  DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
8696  N1, N2);
8697  N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
8698  // Because short has a smaller range than ushort, we can actually get away
8699  // with only a single newton step. This requires that we use a weird bias
8700  // of 89, however (again, this has been exhaustively tested).
8701  // float4 result = as_float4(as_int4(xf*recip) + 0x89);
8702  N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
8703  N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
8704  N1 = DAG.getConstant(0x89, dl, MVT::v4i32);
8705  N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
8706  N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
8707  // Convert back to integer and return.
8708  // return vmovn_s32(vcvt_s32_f32(result));
8709  N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
8710  N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
8711  return N0;
8712 }
8713 
8715  const ARMSubtarget *ST) {
8716  EVT VT = Op.getValueType();
8717  assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
8718  "unexpected type for custom-lowering ISD::SDIV");
8719 
8720  SDLoc dl(Op);
8721  SDValue N0 = Op.getOperand(0);
8722  SDValue N1 = Op.getOperand(1);
8723  SDValue N2, N3;
8724 
8725  if (VT == MVT::v8i8) {
8726  N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N0);
8727  N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N1);
8728 
8729  N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
8730  DAG.getIntPtrConstant(4, dl));
8731  N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
8732  DAG.getIntPtrConstant(4, dl));
8733  N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
8734  DAG.getIntPtrConstant(0, dl));
8735  N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
8736  DAG.getIntPtrConstant(0, dl));
8737 
8738  N0 = LowerSDIV_v4i8(N0, N1, dl, DAG); // v4i16
8739  N2 = LowerSDIV_v4i8(N2, N3, dl, DAG); // v4i16
8740 
8741  N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
8742  N0 = LowerCONCAT_VECTORS(N0, DAG, ST);
8743 
8744  N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i8, N0);
8745  return N0;
8746  }
8747  return LowerSDIV_v4i16(N0, N1, dl, DAG);
8748 }
8749 
8751  const ARMSubtarget *ST) {
8752  // TODO: Should this propagate fast-math-flags?
8753  EVT VT = Op.getValueType();
8754  assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
8755  "unexpected type for custom-lowering ISD::UDIV");
8756 
8757  SDLoc dl(Op);
8758  SDValue N0 = Op.getOperand(0);
8759  SDValue N1 = Op.getOperand(1);
8760  SDValue N2, N3;
8761 
8762  if (VT == MVT::v8i8) {
8763  N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N0);
8764  N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N1);
8765 
8766  N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
8767  DAG.getIntPtrConstant(4, dl));
8768  N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
8769  DAG.getIntPtrConstant(4, dl));
8770  N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
8771  DAG.getIntPtrConstant(0, dl));
8772  N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
8773  DAG.getIntPtrConstant(0, dl));
8774 
8775  N0 = LowerSDIV_v4i16(N0, N1, dl, DAG); // v4i16
8776  N2 = LowerSDIV_v4i16(N2, N3, dl, DAG); // v4i16
8777 
8778  N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
8779  N0 = LowerCONCAT_VECTORS(N0, DAG, ST);
8780 
8782  DAG.getConstant(Intrinsic::arm_neon_vqmovnsu, dl,
8783  MVT::i32),
8784  N0);
8785  return N0;
8786  }
8787 
8788  // v4i16 sdiv ... Convert to float.
8789  // float4 yf = vcvt_f32_s32(vmovl_u16(y));
8790  // float4 xf = vcvt_f32_s32(vmovl_u16(x));
8791  N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0);
8792  N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N1);
8793  N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
8794  SDValue BN1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
8795 
8796  // Use reciprocal estimate and two refinement steps.
8797  // float4 recip = vrecpeq_f32(yf);
8798  // recip *= vrecpsq_f32(yf, recip);
8799  // recip *= vrecpsq_f32(yf, recip);
8801  DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
8802  BN1);
8804  DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
8805  BN1, N2);
8806  N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
8808  DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
8809  BN1, N2);
8810  N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
8811  // Simply multiplying by the reciprocal estimate can leave us a few ulps
8812  // too low, so we add 2 ulps (exhaustive testing shows that this is enough,
8813  // and that it will never cause us to return an answer too large).
8814  // float4 result = as_float4(as_int4(xf*recip) + 2);
8815  N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
8816  N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
8817  N1 = DAG.getConstant(2, dl, MVT::v4i32);
8818  N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
8819  N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
8820  // Convert back to integer and return.
8821  // return vmovn_u32(vcvt_s32_f32(result));
8822  N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
8823  N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
8824  return N0;
8825 }
8826 
8828  SDNode *N = Op.getNode();
8829  EVT VT = N->getValueType(0);
8830  SDVTList VTs = DAG.getVTList(VT, MVT::i32);
8831 
8832  SDValue Carry = Op.getOperand(2);
8833 
8834  SDLoc DL(Op);
8835 
8836  SDValue Result;
8837  if (Op.getOpcode() == ISD::ADDCARRY) {
8838  // This converts the boolean value carry into the carry flag.
8839  Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG);
8840 
8841  // Do the addition proper using the carry flag we wanted.
8842  Result = DAG.getNode(ARMISD::ADDE, DL, VTs, Op.getOperand(0),
8843  Op.getOperand(1), Carry);
8844 
8845  // Now convert the carry flag into a boolean value.
8846  Carry = ConvertCarryFlagToBooleanCarry(Result.getValue(1), VT, DAG);
8847  } else {
8848  // ARMISD::SUBE expects a carry not a borrow like ISD::SUBCARRY so we
8849  // have to invert the carry first.
8850  Carry = DAG.getNode(ISD::SUB, DL, MVT::i32,
8851  DAG.getConstant(1, DL, MVT::i32), Carry);
8852  // This converts the boolean value carry into the carry flag.
8853  Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG);
8854 
8855  // Do the subtraction proper using the carry flag we wanted.
8856  Result = DAG.getNode(ARMISD::SUBE, DL, VTs, Op.getOperand(0),
8857  Op.getOperand(1), Carry);
8858 
8859  // Now convert the carry flag into a boolean value.
8860  Carry = ConvertCarryFlagToBooleanCarry(Result.getValue(1), VT, DAG);
8861  // But the carry returned by ARMISD::SUBE is not a borrow as expected
8862  // by ISD::SUBCARRY, so compute 1 - C.
8863  Carry = DAG.getNode(ISD::SUB, DL, MVT::i32,
8864  DAG.getConstant(1, DL, MVT::i32), Carry);
8865  }
8866 
8867  // Return both values.
8868  return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Carry);
8869 }
8870 
8871 SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
8872  assert(Subtarget->isTargetDarwin());
8873 
8874  // For iOS, we want to call an alternative entry point: __sincos_stret,
8875  // return values are passed via sret.
8876  SDLoc dl(Op);
8877  SDValue Arg = Op.getOperand(0);
8878  EVT ArgVT = Arg.getValueType();
8879  Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
8880  auto PtrVT = getPointerTy(DAG.getDataLayout());
8881 
8883  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
8884 
8885  // Pair of floats / doubles used to pass the result.
8886  Type *RetTy = StructType::get(ArgTy, ArgTy);
8887  auto &DL = DAG.getDataLayout();
8888 
8889  ArgListTy Args;
8890  bool ShouldUseSRet = Subtarget->isAPCS_ABI();
8891  SDValue SRet;
8892  if (ShouldUseSRet) {
8893  // Create stack object for sret.
8894  const uint64_t ByteSize = DL.getTypeAllocSize(RetTy);
8895  const unsigned StackAlign = DL.getPrefTypeAlignment(RetTy);
8896  int FrameIdx = MFI.CreateStackObject(ByteSize, StackAlign, false);
8897  SRet = DAG.getFrameIndex(FrameIdx, TLI.getPointerTy(DL));
8898 
8899  ArgListEntry Entry;
8900  Entry.Node = SRet;
8901  Entry.Ty = RetTy->getPointerTo();
8902  Entry.IsSExt = false;
8903  Entry.IsZExt = false;
8904  Entry.IsSRet = true;
8905  Args.push_back(Entry);
8906  RetTy = Type::getVoidTy(*DAG.getContext());
8907  }
8908 
8909  ArgListEntry Entry;
8910  Entry.Node = Arg;
8911  Entry.Ty = ArgTy;
8912  Entry.IsSExt = false;
8913  Entry.IsZExt = false;
8914  Args.push_back(Entry);
8915 
8916  RTLIB::Libcall LC =
8917  (ArgVT == MVT::f64) ? RTLIB::SINCOS_STRET_F64 : RTLIB::SINCOS_STRET_F32;
8918  const char *LibcallName = getLibcallName(LC);
8920  SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy(DL));
8921 
8923  CLI.setDebugLoc(dl)
8924  .setChain(DAG.getEntryNode())
8925  .setCallee(CC, RetTy, Callee, std::move(Args))
8926  .setDiscardResult(ShouldUseSRet);
8927  std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
8928 
8929  if (!ShouldUseSRet)
8930  return CallResult.first;
8931 
8932  SDValue LoadSin =
8933  DAG.getLoad(ArgVT, dl, CallResult.second, SRet, MachinePointerInfo());
8934 
8935  // Address of cos field.
8936  SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, SRet,
8937  DAG.getIntPtrConstant(ArgVT.getStoreSize(), dl));
8938  SDValue LoadCos =
8939  DAG.getLoad(ArgVT, dl, LoadSin.getValue(1), Add, MachinePointerInfo());
8940 
8941  SDVTList Tys = DAG.getVTList(ArgVT, ArgVT);
8942  return DAG.getNode(ISD::MERGE_VALUES, dl, Tys,
8943  LoadSin.getValue(0), LoadCos.getValue(0));
8944 }
8945 
8946 SDValue ARMTargetLowering::LowerWindowsDIVLibCall(SDValue Op, SelectionDAG &DAG,
8947  bool Signed,
8948  SDValue &Chain) const {
8949  EVT VT = Op.getValueType();
8950  assert((VT == MVT::i32 || VT == MVT::i64) &&
8951  "unexpected type for custom lowering DIV");
8952  SDLoc dl(Op);
8953 
8954  const auto &DL = DAG.getDataLayout();
8955  const auto &TLI = DAG.getTargetLoweringInfo();
8956 
8957  const char *Name = nullptr;
8958  if (Signed)
8959  Name = (VT == MVT::i32) ? "__rt_sdiv" : "__rt_sdiv64";
8960  else
8961  Name = (VT == MVT::i32) ? "__rt_udiv" : "__rt_udiv64";
8962 
8963  SDValue ES = DAG.getExternalSymbol(Name, TLI.getPointerTy(DL));
8964 
8966 
8967  for (auto AI : {1, 0}) {
8968  ArgListEntry Arg;
8969  Arg.Node = Op.getOperand(AI);
8970  Arg.Ty = Arg.Node.getValueType().getTypeForEVT(*DAG.getContext());
8971  Args.push_back(Arg);
8972  }
8973 
8974  CallLoweringInfo CLI(DAG);
8975  CLI.setDebugLoc(dl)
8976  .setChain(Chain)
8977  .setCallee(CallingConv::ARM_AAPCS_VFP, VT.getTypeForEVT(*DAG.getContext()),
8978  ES, std::move(Args));
8979 
8980  return LowerCallTo(CLI).first;
8981 }
8982 
8983 // This is a code size optimisation: return the original SDIV node to
8984 // DAGCombiner when we don't want to expand SDIV into a sequence of
8985 // instructions, and an empty node otherwise which will cause the
8986 // SDIV to be expanded in DAGCombine.
8987 SDValue
8988 ARMTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
8989  SelectionDAG &DAG,
8990  SmallVectorImpl<SDNode *> &Created) const {
8991  // TODO: Support SREM
8992  if (N->getOpcode() != ISD::SDIV)
8993  return SDValue();
8994 
8995  const auto &ST = static_cast<const ARMSubtarget&>(DAG.getSubtarget());
8996  const bool MinSize = ST.hasMinSize();
8997  const bool HasDivide = ST.isThumb() ? ST.hasDivideInThumbMode()
8998  : ST.hasDivideInARMMode();
8999 
9000  // Don't touch vector types; rewriting this may lead to scalarizing
9001  // the int divs.
9002  if (N->getOperand(0).getValueType().isVector())
9003  return SDValue();
9004 
9005  // Bail if MinSize is not set, and also for both ARM and Thumb mode we need
9006  // hwdiv support for this to be really profitable.
9007  if (!(MinSize && HasDivide))
9008  return SDValue();
9009 
9010  // ARM mode is a bit simpler than Thumb: we can handle large power
9011  // of 2 immediates with 1 mov instruction; no further checks required,
9012  // just return the sdiv node.
9013  if (!ST.isThumb())
9014  return SDValue(N, 0);
9015 
9016  // In Thumb mode, immediates larger than 128 need a wide 4-byte MOV,
9017  // and thus lose the code size benefits of a MOVS that requires only 2.
9018  // TargetTransformInfo and 'getIntImmCodeSizeCost' could be helpful here,
9019  // but as it's doing exactly this, it's not worth the trouble to get TTI.
9020  if (Divisor.sgt(128))
9021  return SDValue();
9022 
9023  return SDValue(N, 0);
9024 }
9025 
9026 SDValue ARMTargetLowering::LowerDIV_Windows(SDValue Op, SelectionDAG &DAG,
9027  bool Signed) const {
9028  assert(Op.getValueType() == MVT::i32 &&
9029  "unexpected type for custom lowering DIV");
9030  SDLoc dl(Op);
9031 
9032  SDValue DBZCHK = DAG.getNode(ARMISD::WIN__DBZCHK, dl, MVT::Other,
9033  DAG.getEntryNode(), Op.getOperand(1));
9034 
9035  return LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK);
9036 }
9037 
9039  SDLoc DL(N);
9040  SDValue Op = N->getOperand(1);
9041  if (N->getValueType(0) == MVT::i32)
9042  return DAG.getNode(ARMISD::WIN__DBZCHK, DL, MVT::Other, InChain, Op);
9044  DAG.getConstant(0, DL, MVT::i32));
9046  DAG.getConstant(1, DL, MVT::i32));
9047  return DAG.getNode(ARMISD::WIN__DBZCHK, DL, MVT::Other, InChain,
9048  DAG.getNode(ISD::OR, DL, MVT::i32, Lo, Hi));
9049 }
9050 
9051 void ARMTargetLowering::ExpandDIV_Windows(
9052  SDValue Op, SelectionDAG &DAG, bool Signed,
9054  const auto &DL = DAG.getDataLayout();
9055  const auto &TLI = DAG.getTargetLoweringInfo();
9056 
9057  assert(Op.getValueType() == MVT::i64 &&
9058  "unexpected type for custom lowering DIV");
9059  SDLoc dl(Op);
9060 
9061  SDValue DBZCHK = WinDBZCheckDenominator(DAG, Op.getNode(), DAG.getEntryNode());
9062 
9063  SDValue Result = LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK);
9064 
9065  SDValue Lower = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Result);
9066  SDValue Upper = DAG.getNode(ISD::SRL, dl, MVT::i64, Result,
9067  DAG.getConstant(32, dl, TLI.getPointerTy(DL)));
9068  Upper = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Upper);
9069 
9070  Results.push_back(Lower);
9071  Results.push_back(Upper);
9072 }
9073 
9075  LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
9076  EVT MemVT = LD->getMemoryVT();
9077  assert((MemVT == MVT::v4i1 || MemVT == MVT::v8i1 || MemVT == MVT::v16i1) &&
9078  "Expected a predicate type!");
9079  assert(MemVT == Op.getValueType());
9081  "Expected a non-extending load");
9082  assert(LD->isUnindexed() && "Expected a unindexed load");
9083 
9084  // The basic MVE VLDR on a v4i1/v8i1 actually loads the entire 16bit
9085  // predicate, with the "v4i1" bits spread out over the 16 bits loaded. We
9086  // need to make sure that 8/4 bits are actually loaded into the correct
9087  // place, which means loading the value and then shuffling the values into
9088  // the bottom bits of the predicate.
9089  // Equally, VLDR for an v16i1 will actually load 32bits (so will be incorrect
9090  // for BE).
9091 
9092  SDLoc dl(Op);
9093  SDValue Load = DAG.getExtLoad(
9094  ISD::EXTLOAD, dl, MVT::i32, LD->getChain(), LD->getBasePtr(),
9095  EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits()),
9096  LD->getMemOperand());
9097  SDValue Pred = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v16i1, Load);
9098  if (MemVT != MVT::v16i1)
9099  Pred = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MemVT, Pred,
9100  DAG.getConstant(0, dl, MVT::i32));
9101  return DAG.getMergeValues({Pred, Load.getValue(1)}, dl);
9102 }
9103 
9105  StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
9106  EVT MemVT = ST->getMemoryVT();
9107  assert((MemVT == MVT::v4i1 || MemVT == MVT::v8i1 || MemVT == MVT::v16i1) &&
9108  "Expected a predicate type!");
9109  assert(MemVT == ST->getValue().getValueType());
9110  assert(!ST->isTruncatingStore() && "Expected a non-extending store");
9111  assert(ST->isUnindexed() && "Expected a unindexed store");
9112 
9113  // Only store the v4i1 or v8i1 worth of bits, via a buildvector with top bits
9114  // unset and a scalar store.
9115  SDLoc dl(Op);
9116  SDValue Build = ST->getValue();
9117  if (MemVT != MVT::v16i1) {
9119  for (unsigned I = 0; I < MemVT.getVectorNumElements(); I++)
9120  Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, Build,
9121  DAG.getConstant(I, dl, MVT::i32)));
9122  for (unsigned I = MemVT.getVectorNumElements(); I < 16; I++)
9123  Ops.push_back(DAG.getUNDEF(MVT::i32));
9124  Build = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i1, Ops);
9125  }
9126  SDValue GRP = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, Build);
9127  return DAG.getTruncStore(
9128  ST->getChain(), dl, GRP, ST->getBasePtr(),
9129  EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits()),
9130  ST->getMemOperand());
9131 }
9132 
9133 static bool isZeroVector(SDValue N) {
9134  return (ISD::isBuildVectorAllZeros(N.getNode()) ||
9135  (N->getOpcode() == ARMISD::VMOVIMM &&
9136  isNullConstant(N->getOperand(0))));
9137 }
9138 
9140  MaskedLoadSDNode *N = cast<MaskedLoadSDNode>(Op.getNode());
9141  MVT VT = Op.getSimpleValueType();
9142  SDValue Mask = N->getMask();
9143  SDValue PassThru = N->getPassThru();
9144  SDLoc dl(Op);
9145 
9146  if (isZeroVector(PassThru))
9147  return Op;
9148 
9149  // MVE Masked loads use zero as the passthru value. Here we convert undef to
9150  // zero too, and other values are lowered to a select.
9151  SDValue ZeroVec = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
9152  DAG.getTargetConstant(0, dl, MVT::i32));
9153  SDValue NewLoad = DAG.getMaskedLoad(
9154  VT, dl, N->getChain(), N->getBasePtr(), N->getOffset(), Mask, ZeroVec,
9155  N->getMemoryVT(), N->getMemOperand(), N->getAddressingMode(),
9156  N->getExtensionType(), N->isExpandingLoad());
9157  SDValue Combo = NewLoad;
9158  if (!PassThru.isUndef() &&
9159  (PassThru.getOpcode() != ISD::BITCAST ||
9160  !isZeroVector(PassThru->getOperand(0))))
9161  Combo = DAG.getNode(ISD::VSELECT, dl, VT, Mask, NewLoad, PassThru);
9162  return DAG.getMergeValues({Combo, NewLoad.getValue(1)}, dl);
9163 }
9164 
9166  if (isStrongerThanMonotonic(cast<AtomicSDNode>(Op)->getOrdering()))
9167  // Acquire/Release load/store is not legal for targets without a dmb or
9168  // equivalent available.
9169  return SDValue();
9170 
9171  // Monotonic load/store is legal for all targets.
9172  return Op;
9173 }
9174 
9176  SmallVectorImpl<SDValue> &Results,
9177  SelectionDAG &DAG,
9178  const ARMSubtarget *Subtarget) {
9179  SDLoc DL(N);
9180  // Under Power Management extensions, the cycle-count is:
9181  // mrc p15, #0, <Rt>, c9, c13, #0
9182  SDValue Ops[] = { N->getOperand(0), // Chain
9183  DAG.getTargetConstant(Intrinsic::arm_mrc, DL, MVT::i32),
9184  DAG.getTargetConstant(15, DL, MVT::i32),
9185  DAG.getTargetConstant(0, DL, MVT::i32),
9186  DAG.getTargetConstant(9, DL, MVT::i32),
9187  DAG.getTargetConstant(13, DL, MVT::i32),
9188  DAG.getTargetConstant(0, DL, MVT::i32)
9189  };
9190 
9191  SDValue Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
9192  DAG.getVTList(MVT::i32, MVT::Other), Ops);
9193  Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Cycles32,
9194  DAG.getConstant(0, DL, MVT::i32)));
9195  Results.push_back(Cycles32.getValue(1));
9196 }
9197 
9199  SDLoc dl(V.getNode());
9200  SDValue VLo = DAG.getAnyExtOrTrunc(V, dl, MVT::i32);
9201  SDValue VHi = DAG.getAnyExtOrTrunc(
9202  DAG.getNode(ISD::SRL, dl, MVT::i64, V, DAG.getConstant(32, dl, MVT::i32)),
9203  dl, MVT::i32);
9204  bool isBigEndian = DAG.getDataLayout().isBigEndian();
9205  if (isBigEndian)
9206  std::swap (VLo, VHi);
9207  SDValue RegClass =
9208  DAG.getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
9209  SDValue SubReg0 = DAG.getTargetConstant(ARM::gsub_0, dl, MVT::i32);
9210  SDValue SubReg1 = DAG.getTargetConstant(ARM::gsub_1, dl, MVT::i32);
9211  const SDValue Ops[] = { RegClass, VLo, SubReg0, VHi, SubReg1 };
9212  return SDValue(
9213  DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::Untyped, Ops), 0);
9214 }
9215 
9217  SmallVectorImpl<SDValue> & Results,
9218  SelectionDAG &DAG) {
9219  assert(N->getValueType(0) == MVT::i64 &&
9220  "AtomicCmpSwap on types less than 64 should be legal");
9221  SDValue Ops[] = {N->getOperand(1),
9222  createGPRPairNode(DAG, N->getOperand(2)),
9223  createGPRPairNode(DAG, N->getOperand(3)),
9224  N->getOperand(0)};
9225  SDNode *CmpSwap = DAG.getMachineNode(
9226  ARM::CMP_SWAP_64, SDLoc(N),
9228 
9229  MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
9230  DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
9231 
9232  bool isBigEndian = DAG.getDataLayout().isBigEndian();
9233 
9234  Results.push_back(
9235  DAG.getTargetExtractSubreg(isBigEndian ? ARM::gsub_1 : ARM::gsub_0,
9236  SDLoc(N), MVT::i32, SDValue(CmpSwap, 0)));
9237  Results.push_back(
9238  DAG.getTargetExtractSubreg(isBigEndian ? ARM::gsub_0 : ARM::gsub_1,
9239  SDLoc(N), MVT::i32, SDValue(CmpSwap, 0)));
9240  Results.push_back(SDValue(CmpSwap, 2));
9241 }
9242 
9243 SDValue ARMTargetLowering::LowerFSETCC(SDValue Op, SelectionDAG &DAG) const {
9244  SDLoc dl(Op);
9245  EVT VT = Op.getValueType();
9246  SDValue Chain = Op.getOperand(0);
9247  SDValue LHS = Op.getOperand(1);
9248  SDValue RHS = Op.getOperand(2);
9249  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(3))->get();
9250  bool IsSignaling = Op.getOpcode() == ISD::STRICT_FSETCCS;
9251 
9252  // If we don't have instructions of this float type then soften to a libcall
9253  // and use SETCC instead.
9254  if (isUnsupportedFloatingType(LHS.getValueType())) {
9256  DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS, Chain, IsSignaling);
9257  if (!RHS.getNode()) {
9258  RHS = DAG.getConstant(0, dl, LHS.getValueType());
9259  CC = ISD::SETNE;
9260  }
9261  SDValue Result = DAG.getNode(ISD::SETCC, dl, VT, LHS, RHS,
9262  DAG.getCondCode(CC));
9263  return DAG.getMergeValues({Result, Chain}, dl);
9264  }
9265 
9266  ARMCC::CondCodes CondCode, CondCode2;
9267  FPCCToARMCC(CC, CondCode, CondCode2);
9268 
9269  // FIXME: Chain is not handled correctly here. Currently the FPSCR is implicit
9270  // in CMPFP and CMPFPE, but instead it should be made explicit by these
9271  // instructions using a chain instead of glue. This would also fix the problem
9272  // here (and also in LowerSELECT_CC) where we generate two comparisons when
9273  // CondCode2 != AL.
9274  SDValue True = DAG.getConstant(1, dl, VT);
9275  SDValue False = DAG.getConstant(0, dl, VT);
9276  SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
9277  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
9278  SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl, IsSignaling);
9279  SDValue Result = getCMOV(dl, VT, False, True, ARMcc, CCR, Cmp, DAG);
9280  if (CondCode2 != ARMCC::AL) {
9281  ARMcc = DAG.getConstant(CondCode2, dl, MVT::i32);
9282  Cmp = getVFPCmp(LHS, RHS, DAG, dl, IsSignaling);
9283  Result = getCMOV(dl, VT, Result, True, ARMcc, CCR, Cmp, DAG);
9284  }
9285  return DAG.getMergeValues({Result, Chain}, dl);
9286 }
9287 
9289  LLVM_DEBUG(dbgs() << "Lowering node: "; Op.dump());
9290  switch (Op.getOpcode()) {
9291  default: llvm_unreachable("Don't know how to custom lower this!");
9292  case ISD::WRITE_REGISTER: return LowerWRITE_REGISTER(Op, DAG);
9293  case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
9294  case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
9295  case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
9296  case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
9297  case ISD::SELECT: return LowerSELECT(Op, DAG);
9298  case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
9299  case ISD::BRCOND: return LowerBRCOND(Op, DAG);
9300  case ISD::BR_CC: return LowerBR_CC(Op, DAG);
9301  case ISD::BR_JT: return LowerBR_JT(Op, DAG);
9302  case ISD::VASTART: return LowerVASTART(Op, DAG);
9303  case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG, Subtarget);
9304  case ISD::PREFETCH: return LowerPREFETCH(Op, DAG, Subtarget);
9305  case ISD::SINT_TO_FP:
9306  case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
9309  case ISD::FP_TO_SINT:
9310  case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG);
9311  case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
9312  case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
9313  case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
9314  case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG);
9315  case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG);
9316  case ISD::EH_SJLJ_SETUP_DISPATCH: return LowerEH_SJLJ_SETUP_DISPATCH(Op, DAG);
9317  case ISD::INTRINSIC_VOID: return LowerINTRINSIC_VOID(Op, DAG, Subtarget);
9318  case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG,
9319  Subtarget);
9320  case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG, Subtarget);
9321  case ISD::SHL:
9322  case ISD::SRL:
9323  case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget);
9324  case ISD::SREM: return LowerREM(Op.getNode(), DAG);
9325  case ISD::UREM: return LowerREM(Op.getNode(), DAG);
9326  case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG);
9327  case ISD::SRL_PARTS:
9328  case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG);
9329  case ISD::CTTZ:
9330  case ISD::CTTZ_ZERO_UNDEF: return LowerCTTZ(Op.getNode(), DAG, Subtarget);
9331  case ISD::CTPOP: return LowerCTPOP(Op.getNode(), DAG, Subtarget);
9332  case ISD::SETCC: return LowerVSETCC(Op, DAG, Subtarget);
9333  case ISD::SETCCCARRY: return LowerSETCCCARRY(Op, DAG);
9334  case ISD::ConstantFP: return LowerConstantFP(Op, DAG, Subtarget);
9335  case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget);
9336  case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
9337  case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG, Subtarget);
9338  case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
9339  case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG, Subtarget);
9340  case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG, Subtarget);
9341  case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
9342  case ISD::MUL: return LowerMUL(Op, DAG);
9343  case ISD::SDIV:
9344  if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())
9345  return LowerDIV_Windows(Op, DAG, /* Signed */ true);
9346  return LowerSDIV(Op, DAG, Subtarget);
9347  case ISD::UDIV:
9348  if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())
9349  return LowerDIV_Windows(Op, DAG, /* Signed */ false);
9350  return LowerUDIV(Op, DAG, Subtarget);
9351  case ISD::ADDCARRY:
9352  case ISD::SUBCARRY: return LowerADDSUBCARRY(Op, DAG);
9353  case ISD::SADDO:
9354  case ISD::SSUBO:
9355  return LowerSignedALUO(Op, DAG);
9356  case ISD::UADDO:
9357  case ISD::USUBO:
9358  return LowerUnsignedALUO(Op, DAG);
9359  case ISD::SADDSAT:
9360  case ISD::SSUBSAT:
9361  return LowerSADDSUBSAT(Op, DAG, Subtarget);
9362  case ISD::LOAD:
9363  return LowerPredicateLoad(Op, DAG);
9364  case ISD::STORE:
9365  return LowerPredicateStore(Op, DAG);
9366  case ISD::MLOAD:
9367  return LowerMLOAD(Op, DAG);
9368  case ISD::ATOMIC_LOAD:
9369  case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG);
9370  case ISD::FSINCOS: return LowerFSINCOS(Op, DAG);
9371  case ISD::SDIVREM:
9372  case ISD::UDIVREM: return LowerDivRem(Op, DAG);
9374  if (Subtarget->isTargetWindows())
9375  return LowerDYNAMIC_STACKALLOC(Op, DAG);
9376  llvm_unreachable("Don't know how to custom lower this!");
9377  case ISD::STRICT_FP_ROUND:
9378  case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG);
9379  case ISD::STRICT_FP_EXTEND:
9380  case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
9381  case ISD::STRICT_FSETCC:
9382  case ISD::STRICT_FSETCCS: return LowerFSETCC(Op, DAG);
9383  case ARMISD::WIN__DBZCHK: return SDValue();
9384  }
9385 }
9386 
9388  SelectionDAG &DAG) {
9389  unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
9390  unsigned Opc = 0;
9391  if (IntNo == Intrinsic::arm_smlald)
9392  Opc = ARMISD::SMLALD;
9393  else if (IntNo == Intrinsic::arm_smlaldx)
9394  Opc = ARMISD::SMLALDX;
9395  else if (IntNo == Intrinsic::arm_smlsld)
9396  Opc = ARMISD::SMLSLD;
9397  else if (IntNo == Intrinsic::arm_smlsldx)
9398  Opc = ARMISD::SMLSLDX;
9399  else
9400  return;
9401 
9402  SDLoc dl(N);
9404  N->getOperand(3),
9405  DAG.getConstant(0, dl, MVT::i32));
9407  N->getOperand(3),
9408  DAG.getConstant(1, dl, MVT::i32));
9409 
9410  SDValue LongMul = DAG.getNode(Opc, dl,
9411  DAG.getVTList(MVT::i32, MVT::i32),
9412  N->getOperand(1), N->getOperand(2),
9413  Lo, Hi);
9414  Results.push_back(LongMul.getValue(0));
9415  Results.push_back(LongMul.getValue(1));
9416 }
9417 
9418 /// ReplaceNodeResults - Replace the results of node with an illegal result
9419 /// type with new values built out of custom code.
9421  SmallVectorImpl<SDValue> &Results,
9422  SelectionDAG &DAG) const {
9423  SDValue Res;
9424  switch (N->getOpcode()) {
9425  default:
9426  llvm_unreachable("Don't know how to custom expand this!");
9427  case ISD::READ_REGISTER:
9428  ExpandREAD_REGISTER(N, Results, DAG);
9429  break;
9430  case ISD::BITCAST:
9431  Res = ExpandBITCAST(N, DAG, Subtarget);
9432  break;
9433  case ISD::SRL:
9434  case ISD::SRA:
9435  case ISD::SHL:
9436  Res = Expand64BitShift(N, DAG, Subtarget);
9437  break;
9438  case ISD::SREM:
9439  case ISD::UREM:
9440  Res = LowerREM(N, DAG);
9441  break;
9442  case ISD::SDIVREM:
9443  case ISD::UDIVREM:
9444  Res = LowerDivRem(SDValue(N, 0), DAG);
9445  assert(Res.getNumOperands() == 2 && "DivRem needs two values");
9446  Results.push_back(Res.getValue(0));
9447  Results.push_back(Res.getValue(1));
9448  return;
9449  case ISD::SADDSAT:
9450  case ISD::SSUBSAT:
9451  Res = LowerSADDSUBSAT(SDValue(N, 0), DAG, Subtarget);
9452  break;
9453  case ISD::READCYCLECOUNTER:
9454  ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget);
9455  return;
9456  case ISD::UDIV:
9457  case ISD::SDIV:
9458  assert(Subtarget->isTargetWindows() && "can only expand DIV on Windows");
9459  return ExpandDIV_Windows(SDValue(N, 0), DAG, N->getOpcode() == ISD::SDIV,
9460  Results);
9461  case ISD::ATOMIC_CMP_SWAP:
9462  ReplaceCMP_SWAP_64Results(N, Results, DAG);
9463  return;
9465  return ReplaceLongIntrinsic(N, Results, DAG);
9466  case ISD::ABS:
9467  lowerABS(N, Results, DAG);
9468  return ;
9469 
9470  }
9471  if (Res.getNode())
9472  Results.push_back(Res);
9473 }
9474 
9475 //===----------------------------------------------------------------------===//
9476 // ARM Scheduler Hooks
9477 //===----------------------------------------------------------------------===//
9478 
9479 /// SetupEntryBlockForSjLj - Insert code into the entry block that creates and
9480 /// registers the function context.
9481 void ARMTargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI,
9482  MachineBasicBlock *MBB,
9483  MachineBasicBlock *DispatchBB,
9484  int FI) const {
9485  assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
9486  "ROPI/RWPI not currently supported with SjLj");
9487  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
9488  DebugLoc dl = MI.getDebugLoc();
9489  MachineFunction *MF = MBB->getParent();
9491  MachineConstantPool *MCP = MF->getConstantPool();
9492  ARMFunctionInfo *AFI = MF->getInfo<ARMFunctionInfo>();
9493  const Function &F = MF->getFunction();
9494 
9495  bool isThumb = Subtarget->isThumb();
9496  bool isThumb2 = Subtarget->isThumb2();
9497 
9498  unsigned PCLabelId = AFI->createPICLabelUId();
9499  unsigned PCAdj = (isThumb || isThumb2) ? 4 : 8;
9500  ARMConstantPoolValue *CPV =
9501  ARMConstantPoolMBB::Create(F.getContext(), DispatchBB, PCLabelId, PCAdj);
9502  unsigned CPI = MCP->getConstantPoolIndex(CPV, 4);
9503 
9504  const TargetRegisterClass *TRC = isThumb ? &ARM::tGPRRegClass
9505  : &ARM::GPRRegClass;
9506 
9507  // Grab constant pool and fixed stack memory operands.
9508  MachineMemOperand *CPMMO =
9511 
9512  MachineMemOperand *FIMMOSt =
9515 
9516  // Load the address of the dispatch MBB into the jump buffer.
9517  if (isThumb2) {
9518  // Incoming value: jbuf
9519  // ldr.n r5, LCPI1_1
9520  // orr r5, r5, #1
9521  // add r5, pc
9522  // str r5, [$jbuf, #+4] ; &jbuf[1]
9523  Register NewVReg1 = MRI->createVirtualRegister(TRC);
9524  BuildMI(*MBB, MI, dl, TII->get(ARM::t2LDRpci), NewVReg1)
9525  .addConstantPoolIndex(CPI)
9526  .addMemOperand(CPMMO)
9527  .add(predOps(ARMCC::AL));
9528  // Set the low bit because of thumb mode.
9529  Register NewVReg2 = MRI->createVirtualRegister(TRC);
9530  BuildMI(*MBB, MI, dl, TII->get(ARM::t2ORRri), NewVReg2)
9531  .addReg(NewVReg1, RegState::Kill)
9532  .addImm(0x01)
9533  .add(predOps(ARMCC::AL))
9534  .add(condCodeOp());
9535  Register NewVReg3 = MRI->createVirtualRegister(TRC);
9536  BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg3)
9537  .addReg(NewVReg2, RegState::Kill)
9538  .addImm(PCLabelId);
9539  BuildMI(*MBB, MI, dl, TII->get(ARM::t2STRi12))
9540  .addReg(NewVReg3, RegState::Kill)
9541  .addFrameIndex(FI)
9542  .addImm(36) // &jbuf[1] :: pc
9543  .addMemOperand(FIMMOSt)
9544  .add(predOps(ARMCC::AL));
9545  } else if (isThumb) {
9546  // Incoming value: jbuf
9547  // ldr.n r1, LCPI1_4
9548  // add r1, pc
9549  // mov r2, #1
9550  // orrs r1, r2
9551  // add r2, $jbuf, #+4 ; &jbuf[1]
9552  // str r1, [r2]
9553  Register NewVReg1 = MRI->createVirtualRegister(TRC);
9554  BuildMI(*MBB, MI, dl, TII->get(ARM::tLDRpci), NewVReg1)
9555  .addConstantPoolIndex(CPI)
9556  .addMemOperand(CPMMO)
9557  .add(predOps(ARMCC::AL));
9558  Register NewVReg2 = MRI->createVirtualRegister(TRC);
9559  BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg2)
9560  .addReg(NewVReg1, RegState::Kill)
9561  .addImm(PCLabelId);
9562  // Set the low bit because of thumb mode.
9563  Register NewVReg3 = MRI->createVirtualRegister(TRC);
9564  BuildMI(*MBB, MI, dl, TII->get(ARM::tMOVi8), NewVReg3)
9565  .addReg(ARM::CPSR, RegState::Define)
9566  .addImm(1)
9567  .add(predOps(ARMCC::AL));
9568  Register NewVReg4 = MRI->createVirtualRegister(TRC);
9569  BuildMI(*MBB, MI, dl, TII->get(ARM::tORR), NewVReg4)
9570  .addReg(ARM::CPSR, RegState::Define)
9571  .addReg(NewVReg2, RegState::Kill)
9572  .addReg(NewVReg3, RegState::Kill)
9573  .add(predOps(ARMCC::AL));
9574  Register NewVReg5 = MRI->createVirtualRegister(TRC);
9575  BuildMI(*MBB, MI, dl, TII->get(ARM::tADDframe), NewVReg5)
9576  .addFrameIndex(FI)
9577  .addImm(36); // &jbuf[1] :: pc
9578  BuildMI(*MBB, MI, dl, TII->get(ARM::tSTRi))
9579  .addReg(NewVReg4, RegState::Kill)
9580  .addReg(NewVReg5, RegState::Kill)
9581  .addImm(0)
9582  .addMemOperand(FIMMOSt)
9583  .add(predOps(ARMCC::AL));
9584  } else {
9585  // Incoming value: jbuf
9586  // ldr r1, LCPI1_1
9587  // add r1, pc, r1
9588  // str r1, [$jbuf, #+4] ; &jbuf[1]
9589  Register NewVReg1 = MRI->createVirtualRegister(TRC);
9590  BuildMI(*MBB, MI, dl, TII->get(ARM::LDRi12), NewVReg1)
9591  .addConstantPoolIndex(CPI)
9592  .addImm(0)
9593  .addMemOperand(CPMMO)
9594  .add(predOps(ARMCC::AL));
9595  Register NewVReg2 = MRI->createVirtualRegister(TRC);
9596  BuildMI(*MBB, MI, dl, TII->get(ARM::PICADD), NewVReg2)
9597  .addReg(NewVReg1, RegState::Kill)
9598  .addImm(PCLabelId)
9599  .add(predOps(ARMCC::AL));
9600  BuildMI(*MBB, MI, dl, TII->get(ARM::STRi12))
9601  .addReg(NewVReg2, RegState::Kill)
9602  .addFrameIndex(FI)
9603  .addImm(36) // &jbuf[1] :: pc
9604  .addMemOperand(FIMMOSt)
9605  .add(predOps(ARMCC::AL));
9606  }
9607 }
9608 
9609 void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
9610  MachineBasicBlock *MBB) const {
9611  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
9612  DebugLoc dl = MI.getDebugLoc();
9613  MachineFunction *MF = MBB->getParent();
9615  MachineFrameInfo &MFI = MF->getFrameInfo();
9616  int FI = MFI.getFunctionContextIndex();
9617 
9618  const TargetRegisterClass *TRC = Subtarget->isThumb() ? &ARM::tGPRRegClass
9619  : &ARM::GPRnopcRegClass;
9620 
9621  // Get a mapping of the call site numbers to all of the landing pads they're
9622  // associated with.
9624  unsigned MaxCSNum = 0;
9625  for (MachineFunction::iterator BB = MF->begin(), E = MF->end(); BB != E;
9626  ++BB) {
9627  if (!BB->isEHPad()) continue;
9628 
9629  // FIXME: We should assert that the EH_LABEL is the first MI in the landing
9630  // pad.
9632  II = BB->begin(), IE = BB->end(); II != IE; ++II) {
9633  if (!II->isEHLabel()) continue;
9634 
9635  MCSymbol *Sym = II->getOperand(0).getMCSymbol();
9636  if (!MF->hasCallSiteLandingPad(Sym)) continue;
9637 
9638  SmallVectorImpl<unsigned> &CallSiteIdxs = MF->getCallSiteLandingPad(Sym);
9640  CSI = CallSiteIdxs.begin(), CSE = CallSiteIdxs.end();
9641  CSI != CSE; ++CSI) {
9642  CallSiteNumToLPad[*CSI].push_back(&*BB);
9643  MaxCSNum = std::max(MaxCSNum, *CSI);
9644  }
9645  break;
9646  }
9647  }
9648 
9649  // Get an ordered list of the machine basic blocks for the jump table.
9650  std::vector<MachineBasicBlock*> LPadList;
9652  LPadList.reserve(CallSiteNumToLPad.size());
9653  for (unsigned I = 1; I <= MaxCSNum; ++I) {
9654  SmallVectorImpl<MachineBasicBlock*> &MBBList = CallSiteNumToLPad[I];
9656  II = MBBList.begin(), IE = MBBList.end(); II != IE; ++II) {
9657  LPadList.push_back(*II);
9658  InvokeBBs.insert((*II)->pred_begin(), (*II)->pred_end());
9659  }
9660  }
9661 
9662  assert(!LPadList.empty() &&
9663  "No landing pad destinations for the dispatch jump table!");
9664 
9665  // Create the jump table and associated information.
9666  MachineJumpTableInfo *JTI =
9668  unsigned MJTI = JTI->createJumpTableIndex(LPadList);
9669 
9670  // Create the MBBs for the dispatch code.
9671 
9672  // Shove the dispatch's address into the return slot in the function context.
9673  MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock();
9674  DispatchBB->setIsEHPad();
9675 
9677  unsigned trap_opcode;
9678  if (Subtarget->isThumb())
9679  trap_opcode = ARM::tTRAP;
9680  else
9681  trap_opcode = Subtarget->useNaClTrap() ? ARM::TRAPNaCl : ARM::TRAP;
9682 
9683  BuildMI(TrapBB, dl, TII->get(trap_opcode));
9684  DispatchBB->addSuccessor(TrapBB);
9685 
9686  MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();
9687  DispatchBB->addSuccessor(DispContBB);
9688 
9689  // Insert and MBBs.
9690  MF->insert(MF->end(), DispatchBB);
9691  MF->insert(MF->end(), DispContBB);
9692  MF->insert(MF->end(), TrapBB);
9693 
9694  // Insert code into the entry block that creates and registers the function
9695  // context.
9696  SetupEntryBlockForSjLj(MI, MBB, DispatchBB, FI);
9697 
9698  MachineMemOperand *FIMMOLd = MF->getMachineMemOperand(
9701 
9702  MachineInstrBuilder MIB;
9703  MIB = BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup));
9704 
9705  const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII);
9706  const ARMBaseRegisterInfo &RI = AII->getRegisterInfo();
9707 
9708  // Add a register mask with no preserved registers. This results in all
9709  // registers being marked as clobbered. This can't work if the dispatch block
9710  // is in a Thumb1 function and is linked with ARM code which uses the FP
9711  // registers, as there is no way to preserve the FP registers in Thumb1 mode.
9713 
9714  bool IsPositionIndependent = isPositionIndependent();
9715  unsigned NumLPads = LPadList.size();
9716  if (Subtarget->isThumb2()) {
9717  Register NewVReg1 = MRI->createVirtualRegister(TRC);
9718  BuildMI(DispatchBB, dl, TII->get(ARM::t2LDRi12), NewVReg1)
9719  .addFrameIndex(FI)
9720  .addImm(4)
9721  .addMemOperand(FIMMOLd)
9722  .add(predOps(ARMCC::AL));
9723 
9724  if (NumLPads < 256) {
9725  BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPri))
9726  .addReg(NewVReg1)
9727  .addImm(LPadList.size())
9728  .add(predOps(ARMCC::AL));
9729  } else {
9730  Register VReg1 = MRI->createVirtualRegister(TRC);
9731  BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVi16), VReg1)
9732  .addImm(NumLPads & 0xFFFF)
9733  .add(predOps(ARMCC::AL));
9734 
9735  unsigned VReg2 = VReg1;
9736  if ((NumLPads & 0xFFFF0000) != 0) {
9737  VReg2 = MRI->createVirtualRegister(TRC);
9738  BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVTi16), VReg2)
9739  .addReg(VReg1)
9740  .addImm(NumLPads >> 16)
9741  .add(predOps(ARMCC::AL));
9742  }
9743 
9744  BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPrr))
9745  .addReg(NewVReg1)
9746  .addReg(VReg2)
9747  .add(predOps(ARMCC::AL));
9748  }
9749 
9750  BuildMI(DispatchBB, dl, TII->get(ARM::t2Bcc))
9751  .addMBB(TrapBB)
9752  .addImm(ARMCC::HI)
9753  .addReg(ARM::CPSR);
9754 
9755  Register NewVReg3 = MRI->createVirtualRegister(TRC);
9756  BuildMI(DispContBB, dl, TII->get(ARM::t2LEApcrelJT), NewVReg3)
9757  .addJumpTableIndex(MJTI)
9758  .add(predOps(ARMCC::AL));
9759 
9760  Register NewVReg4 = MRI->createVirtualRegister(TRC);
9761  BuildMI(DispContBB, dl, TII->get(ARM::t2ADDrs), NewVReg4)
9762  .addReg(NewVReg3, RegState::Kill)
9763  .addReg(NewVReg1)
9765  .add(predOps(ARMCC::AL))
9766  .add(condCodeOp());
9767 
9768  BuildMI(DispContBB, dl, TII->get(ARM::t2BR_JT))
9769  .addReg(NewVReg4, RegState::Kill)
9770  .addReg(NewVReg1)
9771  .addJumpTableIndex(MJTI);
9772  } else if (Subtarget->isThumb()) {
9773  Register NewVReg1 = MRI->createVirtualRegister(TRC);
9774  BuildMI(DispatchBB, dl, TII->get(ARM::tLDRspi), NewVReg1)
9775  .addFrameIndex(FI)
9776  .addImm(1)
9777  .addMemOperand(FIMMOLd)
9778  .add(predOps(ARMCC::AL));
9779 
9780  if (NumLPads < 256) {
9781  BuildMI(DispatchBB, dl, TII->get(ARM::tCMPi8))
9782  .addReg(NewVReg1)
9783  .addImm(NumLPads)
9784  .add(predOps(ARMCC::AL));
9785  } else {
9788  const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
9789 
9790  // MachineConstantPool wants an explicit alignment.
9791  unsigned Align = MF->getDataLayout().getPrefTypeAlignment(Int32Ty);
9792  if (Align == 0)
9793  Align = MF->getDataLayout().getTypeAllocSize(C->getType());
9794  unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
9795 
9796  Register VReg1 = MRI->createVirtualRegister(TRC);
9797  BuildMI(DispatchBB, dl, TII->get(ARM::tLDRpci))
9798  .addReg(VReg1, RegState::Define)
9799  .addConstantPoolIndex(Idx)
9800  .add(predOps(ARMCC::AL));
9801  BuildMI(DispatchBB, dl, TII->get(ARM::tCMPr))
9802  .addReg(NewVReg1)
9803  .addReg(VReg1)
9804  .add(predOps(ARMCC::AL));
9805  }
9806 
9807  BuildMI(DispatchBB, dl, TII->get(ARM::tBcc))
9808  .addMBB(TrapBB)
9809  .addImm(ARMCC::HI)
9810  .addReg(ARM::CPSR);
9811 
9812  Register NewVReg2 = MRI->createVirtualRegister(TRC);
9813  BuildMI(DispContBB, dl, TII->get(ARM::tLSLri), NewVReg2)
9814  .addReg(ARM::CPSR, RegState::Define)
9815  .addReg(NewVReg1)
9816  .addImm(2)
9817  .add(predOps(ARMCC::AL));
9818 
9819  Register NewVReg3 = MRI->createVirtualRegister(TRC);
9820  BuildMI(DispContBB, dl, TII->get(ARM::tLEApcrelJT), NewVReg3)
9821  .addJumpTableIndex(MJTI)
9822  .add(predOps(ARMCC::AL));
9823 
9824  Register NewVReg4 = MRI->createVirtualRegister(TRC);
9825  BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg4)
9826  .addReg(ARM::CPSR, RegState::Define)
9827  .addReg(NewVReg2, RegState::Kill)
9828  .addReg(NewVReg3)
9829  .add(predOps(ARMCC::AL));
9830 
9831  MachineMemOperand *JTMMOLd = MF->getMachineMemOperand(
9833 
9834  Register NewVReg5 = MRI->createVirtualRegister(TRC);
9835  BuildMI(DispContBB, dl, TII->get(ARM::tLDRi), NewVReg5)
9836  .addReg(NewVReg4, RegState::Kill)
9837  .addImm(0)
9838  .addMemOperand(JTMMOLd)
9839  .add(predOps(ARMCC::AL));
9840 
9841  unsigned NewVReg6 = NewVReg5;
9842  if (IsPositionIndependent) {
9843  NewVReg6 = MRI->createVirtualRegister(TRC);
9844  BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6)
9845  .addReg(ARM::CPSR, RegState::Define)
9846  .addReg(NewVReg5, RegState::Kill)
9847  .addReg(NewVReg3)
9848  .add(predOps(ARMCC::AL));
9849  }
9850 
9851  BuildMI(DispContBB, dl, TII->get(ARM::tBR_JTr))
9852  .addReg(NewVReg6, RegState::Kill)
9853  .addJumpTableIndex(MJTI);
9854  } else {
9855  Register NewVReg1 = MRI->createVirtualRegister(TRC);
9856  BuildMI(DispatchBB, dl, TII->get(ARM::LDRi12), NewVReg1)
9857  .addFrameIndex(FI)
9858  .addImm(4)
9859  .addMemOperand(FIMMOLd)
9860  .add(predOps(ARMCC::AL));
9861 
9862  if (NumLPads < 256) {
9863  BuildMI(DispatchBB, dl, TII->get(ARM::CMPri))
9864  .addReg(NewVReg1)
9865  .addImm(NumLPads)
9866  .add(predOps(ARMCC::AL));
9867  } else if (Subtarget->hasV6T2Ops() && isUInt<16>(NumLPads)) {
9868  Register VReg1 = MRI->createVirtualRegister(TRC);
9869  BuildMI(DispatchBB, dl, TII->get(ARM::MOVi16), VReg1)
9870  .addImm(NumLPads & 0xFFFF)
9871  .add(predOps(ARMCC::AL));
9872 
9873  unsigned VReg2 = VReg1;
9874  if ((NumLPads & 0xFFFF0000) != 0) {
9875  VReg2 = MRI->createVirtualRegister(TRC);
9876  BuildMI(DispatchBB, dl, TII->get(ARM::MOVTi16), VReg2)
9877  .addReg(VReg1)
9878  .addImm(NumLPads >> 16)
9879  .add(predOps(ARMCC::AL));
9880  }
9881 
9882  BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
9883  .addReg(NewVReg1)
9884  .addReg(VReg2)
9885  .add(predOps(ARMCC::AL));
9886  } else {
9889  const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
9890 
9891  // MachineConstantPool wants an explicit alignment.
9892  unsigned Align = MF->getDataLayout().getPrefTypeAlignment(Int32Ty);
9893  if (Align == 0)
9894  Align = MF->getDataLayout().getTypeAllocSize(C->getType());
9895  unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
9896 
9897  Register VReg1 = MRI->createVirtualRegister(TRC);
9898  BuildMI(DispatchBB, dl, TII->get(ARM::LDRcp))
9899  .addReg(VReg1, RegState::Define)
9900  .addConstantPoolIndex(Idx)
9901  .addImm(0)
9902  .add(predOps(ARMCC::AL));
9903  BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
9904  .addReg(NewVReg1)
9905  .addReg(VReg1, RegState::Kill)
9906  .add(predOps(ARMCC::AL));
9907  }
9908 
9909  BuildMI(DispatchBB, dl, TII->get(ARM::Bcc))
9910  .addMBB(TrapBB)
9911  .addImm(ARMCC::HI)
9912  .addReg(ARM::CPSR);
9913 
9914  Register NewVReg3 = MRI->createVirtualRegister(TRC);
9915  BuildMI(DispContBB, dl, TII->get(ARM::MOVsi), NewVReg3)
9916  .addReg(NewVReg1)
9918  .add(predOps(ARMCC::AL))
9919  .add(condCodeOp());
9920  Register NewVReg4 = MRI->createVirtualRegister(TRC);
9921  BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg4)
9922  .addJumpTableIndex(MJTI)
9923  .add(predOps(ARMCC::AL));
9924 
9925  MachineMemOperand *JTMMOLd = MF->getMachineMemOperand(
9927  Register NewVReg5 = MRI->createVirtualRegister(TRC);
9928  BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg5)
9929  .addReg(NewVReg3, RegState::Kill)
9930  .addReg(NewVReg4)
9931  .addImm(0)
9932  .addMemOperand(JTMMOLd)
9933  .add(predOps(ARMCC::AL));
9934 
9935  if (IsPositionIndependent) {
9936  BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd))
9937  .addReg(NewVReg5, RegState::Kill)
9938  .addReg(NewVReg4)
9939  .addJumpTableIndex(MJTI);
9940  } else {
9941  BuildMI(DispContBB, dl, TII->get(ARM::BR_JTr))
9942  .addReg(NewVReg5, RegState::Kill)
9943  .addJumpTableIndex(MJTI);
9944  }
9945  }
9946 
9947  // Add the jump table entries as successors to the MBB.
9949  for (std::vector<MachineBasicBlock*>::iterator
9950  I = LPadList.begin(), E = LPadList.end(); I != E; ++I) {
9951  MachineBasicBlock *CurMBB = *I;
9952  if (SeenMBBs.insert(CurMBB).second)
9953  DispContBB->addSuccessor(CurMBB);
9954  }
9955 
9956  // N.B. the order the invoke BBs are processed in doesn't matter here.
9957  const MCPhysReg *SavedRegs = RI.getCalleeSavedRegs(MF);
9959  for (MachineBasicBlock *BB : InvokeBBs) {
9960 
9961  // Remove the landing pad successor from the invoke block and replace it
9962  // with the new dispatch block.
9963  SmallVector<MachineBasicBlock*, 4> Successors(BB->succ_begin(),
9964  BB->succ_end());
9965  while (!Successors.empty()) {
9966  MachineBasicBlock *SMBB = Successors.pop_back_val();
9967  if (SMBB->isEHPad()) {
9968  BB->removeSuccessor(SMBB);
9969  MBBLPads.push_back(SMBB);
9970  }
9971  }
9972 
9973  BB->addSuccessor(DispatchBB, BranchProbability::getZero());
9974  BB->normalizeSuccProbs();
9975 
9976  // Find the invoke call and mark all of the callee-saved registers as
9977  // 'implicit defined' so that they're spilled. This prevents code from
9978  // moving instructions to before the EH block, where they will never be
9979  // executed.
9981  II = BB->rbegin(), IE = BB->rend(); II != IE; ++II) {
9982  if (!II->isCall()) continue;
9983 
9984  DenseMap<unsigned, bool> DefRegs;
9986  OI = II->operands_begin(), OE = II->operands_end();
9987  OI != OE; ++OI) {
9988  if (!OI->isReg()) continue;
9989  DefRegs[OI->getReg()] = true;
9990  }
9991 
9992  MachineInstrBuilder MIB(*MF, &*II);
9993 
9994  for (unsigned i = 0; SavedRegs[i] != 0; ++i) {
9995  unsigned Reg = SavedRegs[i];
9996  if (Subtarget->isThumb2() &&
9997  !ARM::tGPRRegClass.contains(Reg) &&
9998  !ARM::hGPRRegClass.contains(Reg))
9999  continue;
10000  if (Subtarget->isThumb1Only() && !ARM::tGPRRegClass.contains(Reg))
10001  continue;
10002  if (!Subtarget->isThumb() && !ARM::GPRRegClass.contains(Reg))
10003  continue;
10004  if (!DefRegs[Reg])
10006  }
10007 
10008  break;
10009  }
10010  }
10011 
10012  // Mark all former landing pads as non-landing pads. The dispatch is the only
10013  // landing pad now.
10015  I = MBBLPads.begin(), E = MBBLPads.end(); I != E; ++I)
10016  (*I)->setIsEHPad(false);
10017 
10018  // The instruction is gone now.
10019  MI.eraseFromParent();
10020 }
10021 
10022 static
10025  E = MBB->succ_end(); I != E; ++I)
10026  if (*I != Succ)
10027  return *I;
10028  llvm_unreachable("Expecting a BB with two successors!");
10029 }
10030 
10031 /// Return the load opcode for a given load size. If load size >= 8,
10032 /// neon opcode will be returned.
10033 static unsigned getLdOpcode(unsigned LdSize, bool IsThumb1, bool IsThumb2) {
10034  if (LdSize >= 8)
10035  return LdSize == 16 ? ARM::VLD1q32wb_fixed
10036  : LdSize == 8 ? ARM::VLD1d32wb_fixed : 0;
10037  if (IsThumb1)
10038  return LdSize == 4 ? ARM::tLDRi
10039  : LdSize == 2 ? ARM::tLDRHi
10040  : LdSize == 1 ? ARM::tLDRBi : 0;
10041  if (IsThumb2)
10042  return LdSize == 4 ? ARM::t2LDR_POST
10043  : LdSize == 2 ? ARM::t2LDRH_POST
10044  : LdSize == 1 ? ARM::t2LDRB_POST : 0;
10045  return LdSize == 4 ? ARM::LDR_POST_IMM
10046  : LdSize == 2 ? ARM::LDRH_POST
10047  : LdSize == 1 ? ARM::LDRB_POST_IMM : 0;
10048 }
10049 
10050 /// Return the store opcode for a given store size. If store size >= 8,
10051 /// neon opcode will be returned.
10052 static unsigned getStOpcode(unsigned StSize, bool IsThumb1, bool IsThumb2) {
10053  if (StSize >= 8)
10054  return StSize == 16 ? ARM::VST1q32wb_fixed
10055  : StSize == 8 ? ARM::VST1d32wb_fixed : 0;
10056  if (IsThumb1)
10057  return StSize == 4 ? ARM::tSTRi
10058  : StSize == 2 ? ARM::tSTRHi
10059  : StSize == 1 ? ARM::tSTRBi : 0;
10060  if (IsThumb2)
10061  return StSize == 4 ? ARM::t2STR_POST
10062  : StSize == 2 ? ARM::t2STRH_POST
10063  : StSize == 1 ? ARM::t2STRB_POST : 0;
10064  return StSize == 4 ? ARM::STR_POST_IMM
10065  : StSize == 2 ? ARM::STRH_POST
10066  : StSize == 1 ? ARM::STRB_POST_IMM : 0;
10067 }
10068 
10069 /// Emit a post-increment load operation with given size. The instructions
10070 /// will be added to BB at Pos.
10072  const TargetInstrInfo *TII, const DebugLoc &dl,
10073  unsigned LdSize, unsigned Data, unsigned AddrIn,
10074  unsigned AddrOut, bool IsThumb1, bool IsThumb2) {
10075  unsigned LdOpc = getLdOpcode(LdSize, IsThumb1, IsThumb2);
10076  assert(LdOpc != 0 && "Should have a load opcode");
10077  if (LdSize >= 8) {
10078  BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
10079  .addReg(AddrOut, RegState::Define)
10080  .addReg(AddrIn)
10081  .addImm(0)
10082  .add(predOps(ARMCC::AL));
10083  } else if (IsThumb1) {
10084  // load + update AddrIn
10085  BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
10086  .addReg(AddrIn)
10087  .addImm(0)
10088  .add(predOps(ARMCC::AL));
10089  BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut)
10090  .add(t1CondCodeOp())
10091  .addReg(AddrIn)
10092  .addImm(LdSize)
10093  .add(predOps(ARMCC::AL));
10094  } else if (IsThumb2) {
10095  BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
10096  .addReg(AddrOut, RegState::Define)
10097  .addReg(AddrIn)
10098  .addImm(LdSize)
10099  .add(predOps(ARMCC::AL));
10100  } else { // arm
10101  BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
10102  .addReg(AddrOut, RegState::Define)
10103  .addReg(AddrIn)
10104  .addReg(0)
10105  .addImm(LdSize)
10106  .add(predOps(ARMCC::AL));
10107  }
10108 }
10109 
10110 /// Emit a post-increment store operation with given size. The instructions
10111 /// will be added to BB at Pos.
10113  const TargetInstrInfo *TII, const DebugLoc &dl,
10114  unsigned StSize, unsigned Data, unsigned AddrIn,
10115  unsigned AddrOut, bool IsThumb1, bool IsThumb2) {
10116  unsigned StOpc = getStOpcode(StSize, IsThumb1, IsThumb2);
10117  assert(StOpc != 0 && "Should have a store opcode");
10118  if (StSize >= 8) {
10119  BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
10120  .addReg(AddrIn)
10121  .addImm(0)
10122  .addReg(Data)
10123  .add(predOps(ARMCC::AL));
10124  } else if (IsThumb1) {
10125  // store + update AddrIn
10126  BuildMI(*BB, Pos, dl, TII->get(StOpc))
10127  .addReg(Data)
10128  .addReg(AddrIn)
10129  .addImm(0)
10130  .add(predOps(ARMCC::AL));
10131  BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut)
10132  .add(t1CondCodeOp())
10133  .addReg(AddrIn)
10134  .addImm(StSize)
10135  .add(predOps(ARMCC::AL));
10136  } else if (IsThumb2) {
10137  BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
10138  .addReg(Data)
10139  .addReg(AddrIn)
10140  .addImm(StSize)
10141  .add(predOps(ARMCC::AL));
10142  } else { // arm
10143  BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
10144  .addReg(Data)
10145  .addReg(AddrIn)
10146  .addReg(0)
10147  .addImm(StSize)
10148  .add(predOps(ARMCC::AL));
10149  }
10150 }
10151 
10153 ARMTargetLowering::EmitStructByval(MachineInstr &MI,
10154  MachineBasicBlock *BB) const {
10155  // This pseudo instruction has 3 operands: dst, src, size
10156  // We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold().
10157  // Otherwise, we will generate unrolled scalar copies.
10158  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
10159  const BasicBlock *LLVM_BB = BB->getBasicBlock();
10161 
10162  Register dest = MI.getOperand(0).getReg();
10163  Register src = MI.getOperand(1).getReg();
10164  unsigned SizeVal = MI.getOperand(2).getImm();
10165  unsigned Align = MI.getOperand(3).getImm();
10166  DebugLoc dl = MI.getDebugLoc();
10167 
10168  MachineFunction *MF = BB->getParent();
10170  unsigned UnitSize = 0;
10171  const TargetRegisterClass *TRC = nullptr;
10172  const TargetRegisterClass *VecTRC = nullptr;
10173 
10174  bool IsThumb1 = Subtarget->isThumb1Only();
10175  bool IsThumb2 = Subtarget->isThumb2();
10176  bool IsThumb = Subtarget->isThumb();
10177 
10178  if (Align & 1) {
10179  UnitSize = 1;
10180  } else if (Align & 2) {
10181  UnitSize = 2;
10182  } else {
10183  // Check whether we can use NEON instructions.
10184  if (!MF->getFunction().hasFnAttribute(Attribute::NoImplicitFloat) &&
10185  Subtarget->hasNEON()) {
10186  if ((Align % 16 == 0) && SizeVal >= 16)
10187  UnitSize = 16;
10188  else if ((Align % 8 == 0) && SizeVal >= 8)
10189  UnitSize = 8;
10190  }
10191  // Can't use NEON instructions.
10192  if (UnitSize == 0)
10193  UnitSize = 4;
10194  }
10195 
10196  // Select the correct opcode and register class for unit size load/store
10197  bool IsNeon = UnitSize >= 8;
10198  TRC = IsThumb ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
10199  if (IsNeon)
10200  VecTRC = UnitSize == 16 ? &ARM::DPairRegClass
10201  : UnitSize == 8 ? &ARM::DPRRegClass
10202  : nullptr;
10203 
10204  unsigned BytesLeft = SizeVal % UnitSize;
10205  unsigned LoopSize = SizeVal - BytesLeft;
10206 
10207  if (SizeVal <= Subtarget->getMaxInlineSizeThreshold()) {
10208  // Use LDR and STR to copy.
10209  // [scratch, srcOut] = LDR_POST(srcIn, UnitSize)
10210  // [destOut] = STR_POST(scratch, destIn, UnitSize)
10211  unsigned srcIn = src;
10212  unsigned destIn = dest;
10213  for (unsigned i = 0; i < LoopSize; i+=UnitSize) {
10214  Register srcOut = MRI.createVirtualRegister(TRC);
10215  Register destOut = MRI.createVirtualRegister(TRC);
10216  Register scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC);
10217  emitPostLd(BB, MI, TII, dl, UnitSize, scratch, srcIn, srcOut,
10218  IsThumb1, IsThumb2);
10219  emitPostSt(BB, MI, TII, dl, UnitSize, scratch, destIn, destOut,
10220  IsThumb1, IsThumb2);
10221  srcIn = srcOut;
10222  destIn = destOut;
10223  }
10224 
10225  // Handle the leftover bytes with LDRB and STRB.
10226  // [scratch, srcOut] = LDRB_POST(srcIn, 1)
10227  // [destOut] = STRB_POST(scratch, destIn, 1)
10228  for (unsigned i = 0; i < BytesLeft; i++) {
10229  Register srcOut = MRI.createVirtualRegister(TRC);
10230  Register destOut = MRI.createVirtualRegister(TRC);
10231  Register scratch = MRI.createVirtualRegister(TRC);
10232  emitPostLd(BB, MI, TII, dl, 1, scratch, srcIn, srcOut,
10233  IsThumb1, IsThumb2);
10234  emitPostSt(BB, MI, TII, dl, 1, scratch, destIn, destOut,
10235  IsThumb1, IsThumb2);
10236  srcIn = srcOut;
10237  destIn = destOut;
10238  }
10239  MI.eraseFromParent(); // The instruction is gone now.
10240  return BB;
10241  }
10242 
10243  // Expand the pseudo op to a loop.
10244  // thisMBB:
10245  // ...
10246  // movw varEnd, # --> with thumb2
10247  // movt varEnd, #
10248  // ldrcp varEnd, idx --> without thumb2
10249  // fallthrough --> loopMBB
10250  // loopMBB:
10251  // PHI varPhi, varEnd, varLoop
10252  // PHI srcPhi, src, srcLoop
10253  // PHI destPhi, dst, destLoop
10254  // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
10255  // [destLoop] = STR_POST(scratch, destPhi, UnitSize)
10256  // subs varLoop, varPhi, #UnitSize
10257  // bne loopMBB
10258  // fallthrough --> exitMBB
10259  // exitMBB:
10260  // epilogue to handle left-over bytes
10261  // [scratch, srcOut] = LDRB_POST(srcLoop, 1)
10262  // [destOut] = STRB_POST(scratch, destLoop, 1)
10263  MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
10264  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
10265  MF->insert(It, loopMBB);
10266  MF->insert(It, exitMBB);
10267 
10268  // Transfer the remainder of BB and its successor edges to exitMBB.
10269  exitMBB->splice(exitMBB->begin(), BB,
10270  std::next(MachineBasicBlock::iterator(MI)), BB->end());
10271  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
10272 
10273  // Load an immediate to varEnd.
10274  Register varEnd = MRI.createVirtualRegister(TRC);
10275  if (Subtarget->useMovt()) {
10276  unsigned Vtmp = varEnd;
10277  if ((LoopSize & 0xFFFF0000) != 0)
10278  Vtmp = MRI.createVirtualRegister(TRC);
10279  BuildMI(BB, dl, TII->get(IsThumb ? ARM::t2MOVi16 : ARM::MOVi16), Vtmp)
10280  .addImm(LoopSize & 0xFFFF)
10281  .add(predOps(ARMCC::AL));
10282 
10283  if ((LoopSize & 0xFFFF0000) != 0)
10284  BuildMI(BB, dl, TII->get(IsThumb ? ARM::t2MOVTi16 : ARM::MOVTi16), varEnd)
10285  .addReg(Vtmp)
10286  .addImm(LoopSize >> 16)
10287  .add(predOps(ARMCC::AL));
10288  } else {
10291  const Constant *C = ConstantInt::get(Int32Ty, LoopSize);
10292 
10293  // MachineConstantPool wants an explicit alignment.
10294  unsigned Align = MF->getDataLayout().getPrefTypeAlignment(Int32Ty);
10295  if (Align == 0)
10296  Align = MF->getDataLayout().getTypeAllocSize(C->getType());
10297  unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
10298  MachineMemOperand *CPMMO =
10301 
10302  if (IsThumb)
10303  BuildMI(*BB, MI, dl, TII->get(ARM::tLDRpci))
10304  .addReg(varEnd, RegState::Define)
10305  .addConstantPoolIndex(Idx)
10306  .add(predOps(ARMCC::AL))
10307  .addMemOperand(CPMMO);
10308  else
10309  BuildMI(*BB, MI, dl, TII->get(ARM::LDRcp))
10310  .addReg(varEnd, RegState::Define)
10311  .addConstantPoolIndex(Idx)
10312  .addImm(0)
10313  .add(predOps(ARMCC::AL))
10314  .addMemOperand(CPMMO);
10315  }
10316  BB->addSuccessor(loopMBB);
10317 
10318  // Generate the loop body:
10319  // varPhi = PHI(varLoop, varEnd)
10320  // srcPhi = PHI(srcLoop, src)
10321  // destPhi = PHI(destLoop, dst)
10322  MachineBasicBlock *entryBB = BB;
10323  BB = loopMBB;
10324  Register varLoop = MRI.createVirtualRegister(TRC);
10325  Register varPhi = MRI.createVirtualRegister(TRC);
10326  Register srcLoop = MRI.createVirtualRegister(TRC);
10327  Register srcPhi = MRI.createVirtualRegister(TRC);
10328  Register destLoop = MRI.createVirtualRegister(TRC);
10329  Register destPhi = MRI.createVirtualRegister(TRC);
10330 
10331  BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), varPhi)
10332  .addReg(varLoop).addMBB(loopMBB)
10333  .addReg(varEnd).addMBB(entryBB);
10334  BuildMI(BB, dl, TII->get(ARM::PHI), srcPhi)
10335  .addReg(srcLoop).addMBB(loopMBB)
10336  .addReg(src).addMBB(entryBB);
10337  BuildMI(BB, dl, TII->get(ARM::PHI), destPhi)
10338  .addReg(destLoop).addMBB(loopMBB)
10339  .addReg(dest).addMBB(entryBB);
10340 
10341  // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
10342  // [destLoop] = STR_POST(scratch, destPhi, UnitSiz)
10343  Register scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC);
10344  emitPostLd(BB, BB->end(), TII, dl, UnitSize, scratch, srcPhi, srcLoop,
10345  IsThumb1, IsThumb2);
10346  emitPostSt(BB, BB->end(), TII, dl, UnitSize, scratch, destPhi, destLoop,
10347  IsThumb1, IsThumb2);
10348 
10349  // Decrement loop variable by UnitSize.
10350  if (IsThumb1) {
10351  BuildMI(*BB, BB->end(), dl, TII->get(ARM::tSUBi8), varLoop)
10352  .add(t1CondCodeOp())
10353  .addReg(varPhi)
10354  .addImm(UnitSize)
10355  .add(predOps(ARMCC::AL));
10356  } else {
10357  MachineInstrBuilder MIB =
10358  BuildMI(*BB, BB->end(), dl,
10359  TII->get(IsThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop);
10360  MIB.addReg(varPhi)
10361  .addImm(UnitSize)
10362  .add(predOps(ARMCC::AL))
10363  .add(condCodeOp());
10364  MIB->getOperand(5).setReg(ARM::CPSR);
10365  MIB->getOperand(5).setIsDef(true);
10366  }
10367  BuildMI(*BB, BB->end(), dl,
10368  TII->get(IsThumb1 ? ARM::tBcc : IsThumb2 ? ARM::t2Bcc : ARM::Bcc))
10369  .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
10370 
10371  // loopMBB can loop back to loopMBB or fall through to exitMBB.
10372  BB->addSuccessor(loopMBB);
10373  BB->addSuccessor(exitMBB);
10374 
10375  // Add epilogue to handle BytesLeft.
10376  BB = exitMBB;
10377  auto StartOfExit = exitMBB->begin();
10378 
10379  // [scratch, srcOut] = LDRB_POST(srcLoop, 1)
10380  // [destOut] = STRB_POST(scratch, destLoop, 1)
10381  unsigned srcIn = srcLoop;
10382  unsigned destIn = destLoop;
10383  for (unsigned i = 0; i < BytesLeft; i++) {
10384  Register srcOut = MRI.createVirtualRegister(TRC);
10385  Register destOut = MRI.createVirtualRegister(TRC);
10386  Register scratch = MRI.createVirtualRegister(TRC);
10387  emitPostLd(BB, StartOfExit, TII, dl, 1, scratch, srcIn, srcOut,
10388  IsThumb1, IsThumb2);
10389  emitPostSt(BB, StartOfExit, TII, dl, 1, scratch, destIn, destOut,
10390  IsThumb1, IsThumb2);
10391  srcIn = srcOut;
10392  destIn = destOut;
10393  }
10394 
10395  MI.eraseFromParent(); // The instruction is gone now.
10396  return BB;
10397 }
10398 
10400 ARMTargetLowering::EmitLowered__chkstk(MachineInstr &MI,
10401  MachineBasicBlock *MBB) const {
10402  const TargetMachine &TM = getTargetMachine();
10403  const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
10404  DebugLoc DL = MI.getDebugLoc();
10405 
10406  assert(Subtarget->isTargetWindows() &&
10407  "__chkstk is only supported on Windows");
10408  assert(Subtarget->isThumb2() && "Windows on ARM requires Thumb-2 mode");
10409 
10410  // __chkstk takes the number of words to allocate on the stack in R4, and
10411  // returns the stack adjustment in number of bytes in R4. This will not
10412  // clober any other registers (other than the obvious lr).
10413  //
10414  // Although, technically, IP should be considered a register which may be
10415  // clobbered, the call itself will not touch it. Windows on ARM is a pure
10416  // thumb-2 environment, so there is no interworking required. As a result, we
10417  // do not expect a veneer to be emitted by the linker, clobbering IP.
10418  //
10419  // Each module receives its own copy of __chkstk, so no import thunk is
10420  // required, again, ensuring that IP is not clobbered.
10421  //
10422  // Finally, although some linkers may theoretically provide a trampoline for
10423  // out of range calls (which is quite common due to a 32M range limitation of
10424  // branches for Thumb), we can generate the long-call version via
10425  // -mcmodel=large, alleviating the need for the trampoline which may clobber
10426  // IP.
10427 
10428  switch (TM.getCodeModel()) {
10429  case CodeModel::Tiny:
10430  llvm_unreachable("Tiny code model not available on ARM.");
10431  case CodeModel::Small:
10432  case CodeModel::Medium:
10433  case CodeModel::Kernel:
10434  BuildMI(*MBB, MI, DL, TII.get(ARM::tBL))
10435  .add(predOps(ARMCC::AL))
10436  .addExternalSymbol("__chkstk")
10439  .addReg(ARM::R12,
10441  .addReg(ARM::CPSR,
10443  break;
10444  case CodeModel::Large: {
10446  Register Reg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
10447 
10448  BuildMI(*MBB, MI, DL, TII.get(ARM::t2MOVi32imm), Reg)
10449  .addExternalSymbol("__chkstk");
10450  BuildMI(*MBB, MI, DL, TII.get(ARM::tBLXr))
10451  .add(predOps(ARMCC::AL))
10452  .addReg(Reg, RegState::Kill)
10455  .addReg(ARM::R12,
10457  .addReg(ARM::CPSR,
10459  break;
10460  }
10461  }
10462 
10463  BuildMI(*MBB, MI, DL, TII.get(ARM::t2SUBrr), ARM::SP)
10464  .addReg(ARM::SP, RegState::Kill)
10467  .add(predOps(ARMCC::AL))
10468  .add(condCodeOp());
10469 
10470  MI.eraseFromParent();
10471  return MBB;
10472 }
10473 
10475 ARMTargetLowering::EmitLowered__dbzchk(MachineInstr &MI,
10476  MachineBasicBlock *MBB) const {
10477  DebugLoc DL = MI.getDebugLoc();
10478  MachineFunction *MF = MBB->getParent();
10479  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
10480 
10482  MF->insert(++MBB->getIterator(), ContBB);
10483  ContBB->splice(ContBB->begin(), MBB,
10484  std::next(MachineBasicBlock::iterator(MI)), MBB->end());
10485  ContBB->transferSuccessorsAndUpdatePHIs(MBB);
10486  MBB->addSuccessor(ContBB);
10487 
10489  BuildMI(TrapBB, DL, TII->get(ARM::t__brkdiv0));
10490  MF->push_back(TrapBB);
10491  MBB->addSuccessor(TrapBB);
10492 
10493  BuildMI(*MBB, MI, DL, TII->get(ARM::tCMPi8))
10494  .addReg(MI.getOperand(0).getReg())
10495  .addImm(0)
10496  .add(predOps(ARMCC::AL));
10497  BuildMI(*MBB, MI, DL, TII->get(ARM::t2Bcc))
10498  .addMBB(TrapBB)
10499  .addImm(ARMCC::EQ)
10500  .addReg(ARM::CPSR);
10501 
10502  MI.eraseFromParent();
10503  return ContBB;
10504 }
10505 
10506 // The CPSR operand of SelectItr might be missing a kill marker
10507 // because there were multiple uses of CPSR, and ISel didn't know
10508 // which to mark. Figure out whether SelectItr should have had a
10509 // kill marker, and set it if it should. Returns the correct kill
10510 // marker value.
10512  MachineBasicBlock* BB,
10513  const TargetRegisterInfo* TRI) {
10514  // Scan forward through BB for a use/def of CPSR.
10515  MachineBasicBlock::iterator miI(std::next(SelectItr));
10516  for (MachineBasicBlock::iterator miE = BB->end(); miI != miE; ++miI) {
10517  const MachineInstr& mi = *miI;
10518  if (mi.readsRegister(ARM::CPSR))
10519  return false;
10520  if (mi.definesRegister(ARM::CPSR))
10521  break; // Should have kill-flag - update below.
10522  }
10523 
10524  // If we hit the end of the block, check whether CPSR is live into a
10525  // successor.
10526  if (miI == BB->end()) {
10527  for (MachineBasicBlock::succ_iterator sItr = BB->succ_begin(),
10528  sEnd = BB->succ_end();
10529  sItr != sEnd; ++sItr) {
10530  MachineBasicBlock* succ = *sItr;
10531  if (succ->isLiveIn(ARM::CPSR))
10532  return false;
10533  }
10534  }
10535 
10536  // We found a def, or hit the end of the basic block and CPSR wasn't live
10537  // out. SelectMI should have a kill flag on CPSR.
10538  SelectItr->addRegisterKilled(ARM::CPSR, TRI);
10539  return true;
10540 }
10541 
10544  MachineBasicBlock *BB) const {
10545  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
10546  DebugLoc dl = MI.getDebugLoc();
10547  bool isThumb2 = Subtarget->isThumb2();
10548  switch (MI.getOpcode()) {
10549  default: {
10550  MI.print(errs());
10551  llvm_unreachable("Unexpected instr type to insert");
10552  }
10553 
10554  // Thumb1 post-indexed loads are really just single-register LDMs.
10555  case ARM::tLDR_postidx: {
10556  MachineOperand Def(MI.getOperand(1));
10557  BuildMI(*BB, MI, dl, TII->get(ARM::tLDMIA_UPD))
10558  .add(Def) // Rn_wb
10559  .add(MI.getOperand(2)) // Rn
10560  .add(MI.getOperand(3)) // PredImm
10561  .add(MI.getOperand(4)) // PredReg
10562  .add(MI.getOperand(0)) // Rt
10563  .cloneMemRefs(MI);
10564  MI.eraseFromParent();
10565  return BB;
10566  }
10567 
10568  // The Thumb2 pre-indexed stores have the same MI operands, they just
10569  // define them differently in the .td files from the isel patterns, so
10570  // they need pseudos.
10571  case ARM::t2STR_preidx:
10572  MI.setDesc(TII->get(ARM::t2STR_PRE));
10573  return BB;
10574  case ARM::t2STRB_preidx:
10575  MI.setDesc(TII->get(ARM::t2STRB_PRE));
10576  return BB;
10577  case ARM::t2STRH_preidx:
10578  MI.setDesc(TII->get(ARM::t2STRH_PRE));
10579  return BB;
10580 
10581  case ARM::STRi_preidx:
10582  case ARM::STRBi_preidx: {
10583  unsigned NewOpc = MI.getOpcode() == ARM::STRi_preidx ? ARM::STR_PRE_IMM
10584  : ARM::STRB_PRE_IMM;
10585  // Decode the offset.
10586  unsigned Offset = MI.getOperand(4).getImm();
10587  bool isSub = ARM_AM::getAM2Op(Offset) == ARM_AM::sub;
10588  Offset = ARM_AM::getAM2Offset(Offset);
10589  if (isSub)
10590  Offset = -Offset;
10591 
10592  MachineMemOperand *MMO = *MI.memoperands_begin();
10593  BuildMI(*BB, MI, dl, TII->get(NewOpc))
10594  .add(MI.getOperand(0)) // Rn_wb
10595  .add(MI.getOperand(1)) // Rt
10596  .add(MI.getOperand(2)) // Rn
10597  .addImm(Offset) // offset (skip GPR==zero_reg)
10598  .add(MI.getOperand(5)) // pred
10599  .add(MI.getOperand(6))
10600  .addMemOperand(MMO);
10601  MI.eraseFromParent();
10602  return BB;
10603  }
10604  case ARM::STRr_preidx:
10605  case ARM::STRBr_preidx:
10606  case ARM::STRH_preidx: {
10607  unsigned NewOpc;
10608  switch (MI.getOpcode()) {
10609  default: llvm_unreachable("unexpected opcode!");
10610  case ARM::STRr_preidx: NewOpc = ARM::STR_PRE_REG; break;
10611  case ARM::STRBr_preidx: NewOpc = ARM::STRB_PRE_REG; break;
10612  case ARM::STRH_preidx: NewOpc = ARM::STRH_PRE; break;
10613  }
10614  MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc));
10615  for (unsigned i = 0; i < MI.getNumOperands(); ++i)
10616  MIB.add(MI.getOperand(i));
10617  MI.eraseFromParent();
10618  return BB;
10619  }
10620 
10621  case ARM::tMOVCCr_pseudo: {
10622  // To "insert" a SELECT_CC instruction, we actually have to insert the
10623  // diamond control-flow pattern. The incoming instruction knows the
10624  // destination vreg to set, the condition code register to branch on, the
10625  // true/false values to select between, and a branch opcode to use.
10626  const BasicBlock *LLVM_BB = BB->getBasicBlock();
10628 
10629  // thisMBB:
10630  // ...
10631  // TrueVal = ...
10632  // cmpTY ccX, r1, r2
10633  // bCC copy1MBB
10634  // fallthrough --> copy0MBB
10635  MachineBasicBlock *thisMBB = BB;
10636  MachineFunction *F = BB->getParent();
10637  MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
10638  MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
10639  F->insert(It, copy0MBB);
10640  F->insert(It, sinkMBB);
10641 
10642  // Check whether CPSR is live past the tMOVCCr_pseudo.
10643  const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
10644  if (!MI.killsRegister(ARM::CPSR) &&
10645  !checkAndUpdateCPSRKill(MI, thisMBB, TRI)) {
10646  copy0MBB->addLiveIn(ARM::CPSR);
10647  sinkMBB->addLiveIn(ARM::CPSR);
10648  }
10649 
10650  // Transfer the remainder of BB and its successor edges to sinkMBB.
10651  sinkMBB->splice(sinkMBB->begin(), BB,
10652  std::next(MachineBasicBlock::iterator(MI)), BB->end());
10653  sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
10654 
10655  BB->addSuccessor(copy0MBB);
10656  BB->addSuccessor(sinkMBB);
10657 
10658  BuildMI(BB, dl, TII->get(ARM::tBcc))
10659  .addMBB(sinkMBB)
10660  .addImm(MI.getOperand(3).getImm())
10661  .addReg(MI.getOperand(4).getReg());
10662 
10663  // copy0MBB:
10664  // %FalseValue = ...
10665  // # fallthrough to sinkMBB
10666  BB = copy0MBB;
10667 
10668  // Update machine-CFG edges
10669  BB->addSuccessor(sinkMBB);
10670 
10671  // sinkMBB:
10672  // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
10673  // ...
10674  BB = sinkMBB;
10675  BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), MI.getOperand(0).getReg())
10676  .addReg(MI.getOperand(1).getReg())
10677  .addMBB(copy0MBB)
10678  .addReg(MI.getOperand(2).getReg())
10679  .addMBB(thisMBB);
10680 
10681  MI.eraseFromParent(); // The pseudo instruction is gone now.
10682  return BB;
10683  }
10684 
10685  case ARM::BCCi64:
10686  case ARM::BCCZi64: {
10687  // If there is an unconditional branch to the other successor, remove it.
10688  BB->erase(std::next(MachineBasicBlock::iterator(MI)), BB->end());
10689 
10690  // Compare both parts that make up the double comparison separately for
10691  // equality.
10692  bool RHSisZero = MI.getOpcode() == ARM::BCCZi64;
10693 
10694  Register LHS1 = MI.getOperand(1).getReg();
10695  Register LHS2 = MI.getOperand(2).getReg();
10696  if (RHSisZero) {
10697  BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
10698  .addReg(LHS1)
10699  .addImm(0)
10700  .add(predOps(ARMCC::AL));
10701  BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
10702  .addReg(LHS2).addImm(0)
10703  .addImm(ARMCC::EQ).addReg(ARM::CPSR);
10704  } else {
10705  Register RHS1 = MI.getOperand(3).getReg();
10706  Register RHS2 = MI.getOperand(4).getReg();
10707  BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
10708  .addReg(LHS1)
10709  .addReg(RHS1)
10710  .add(predOps(ARMCC::AL));
10711  BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
10712  .addReg(LHS2).addReg(RHS2)
10713  .addImm(ARMCC::EQ).addReg(ARM::CPSR);
10714  }
10715 
10716  MachineBasicBlock *destMBB = MI.getOperand(RHSisZero ? 3 : 5).getMBB();
10717  MachineBasicBlock *exitMBB = OtherSucc(BB, destMBB);
10718  if (MI.getOperand(0).getImm() == ARMCC::NE)
10719  std::swap(destMBB, exitMBB);
10720 
10721  BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
10722  .addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR);
10723  if (isThumb2)
10724  BuildMI(BB, dl, TII->get(ARM::t2B))
10725  .addMBB(exitMBB)
10726  .add(predOps(ARMCC::AL));
10727  else
10728  BuildMI(BB, dl, TII->get(ARM::B)) .addMBB(exitMBB);
10729 
10730  MI.eraseFromParent(); // The pseudo instruction is gone now.
10731  return BB;
10732  }
10733 
10734  case ARM::Int_eh_sjlj_setjmp:
10735  case ARM::Int_eh_sjlj_setjmp_nofp:
10736  case ARM::tInt_eh_sjlj_setjmp:
10737  case ARM::t2Int_eh_sjlj_setjmp:
10738  case ARM::t2Int_eh_sjlj_setjmp_nofp:
10739  return BB;
10740 
10741  case ARM::Int_eh_sjlj_setup_dispatch:
10742  EmitSjLjDispatchBlock(MI, BB);
10743  return BB;
10744 
10745  case ARM::ABS:
10746  case ARM::t2ABS: {
10747  // To insert an ABS instruction, we have to insert the
10748  // diamond control-flow pattern. The incoming instruction knows the
10749  // source vreg to test against 0, the destination vreg to set,
10750  // the condition code register to branch on, the
10751  // true/false values to select between, and a branch opcode to use.
10752  // It transforms
10753  // V1 = ABS V0
10754  // into
10755  // V2 = MOVS V0
10756  // BCC (branch to SinkBB if V0 >= 0)
10757  // RSBBB: V3 = RSBri V2, 0 (compute ABS if V2 < 0)
10758  // SinkBB: V1 = PHI(V2, V3)
10759  const BasicBlock *LLVM_BB = BB->getBasicBlock();
10760  MachineFunction::iterator BBI = ++BB->getIterator();
10761  MachineFunction *Fn = BB->getParent();
10762  MachineBasicBlock *RSBBB = Fn->CreateMachineBasicBlock(LLVM_BB);
10763  MachineBasicBlock *SinkBB = Fn->CreateMachineBasicBlock(LLVM_BB);
10764  Fn->insert(BBI, RSBBB);
10765  Fn->insert(BBI, SinkBB);
10766 
10767  Register ABSSrcReg = MI.getOperand(1).getReg();
10768  Register ABSDstReg = MI.getOperand(0).getReg();
10769  bool ABSSrcKIll = MI.getOperand(1).isKill();
10770  bool isThumb2 = Subtarget->isThumb2();
10772  // In Thumb mode S must not be specified if source register is the SP or
10773  // PC and if destination register is the SP, so restrict register class
10774  Register NewRsbDstReg = MRI.createVirtualRegister(
10775  isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass);
10776 
10777  // Transfer the remainder of BB and its successor edges to sinkMBB.
10778  SinkBB->splice(SinkBB->begin(), BB,
10779  std::next(MachineBasicBlock::iterator(MI)), BB->end());
10780  SinkBB->transferSuccessorsAndUpdatePHIs(BB);
10781 
10782  BB->addSuccessor(RSBBB);
10783  BB->addSuccessor(SinkBB);
10784 
10785  // fall through to SinkMBB
10786  RSBBB->addSuccessor(SinkBB);
10787 
10788  // insert a cmp at the end of BB
10789  BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
10790  .addReg(ABSSrcReg)
10791  .addImm(0)
10792  .add(predOps(ARMCC::AL));
10793 
10794  // insert a bcc with opposite CC to ARMCC::MI at the end of BB
10795  BuildMI(BB, dl,
10796  TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)).addMBB(SinkBB)
10798 
10799  // insert rsbri in RSBBB
10800  // Note: BCC and rsbri will be converted into predicated rsbmi
10801  // by if-conversion pass
10802  BuildMI(*RSBBB, RSBBB->begin(), dl,
10803  TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg)
10804  .addReg(ABSSrcReg, ABSSrcKIll ? RegState::Kill : 0)
10805  .addImm(0)
10806  .add(predOps(ARMCC::AL))
10807  .add(condCodeOp());
10808 
10809  // insert PHI in SinkBB,
10810  // reuse ABSDstReg to not change uses of ABS instruction
10811  BuildMI(*SinkBB, SinkBB->begin(), dl,
10812  TII->get(ARM::PHI), ABSDstReg)
10813  .addReg(NewRsbDstReg).addMBB(RSBBB)
10814  .addReg(ABSSrcReg).addMBB(BB);
10815 
10816  // remove ABS instruction
10817  MI.eraseFromParent();
10818 
10819  // return last added BB
10820  return SinkBB;
10821  }
10822  case ARM::COPY_STRUCT_BYVAL_I32:
10823  ++NumLoopByVals;
10824  return EmitStructByval(MI, BB);
10825  case ARM::WIN__CHKSTK:
10826  return EmitLowered__chkstk(MI, BB);
10827  case ARM::WIN__DBZCHK:
10828  return EmitLowered__dbzchk(MI, BB);
10829  }
10830 }
10831 
10832 /// Attaches vregs to MEMCPY that it will use as scratch registers
10833 /// when it is expanded into LDM/STM. This is done as a post-isel lowering
10834 /// instead of as a custom inserter because we need the use list from the SDNode.
10835 static void attachMEMCPYScratchRegs(const ARMSubtarget *Subtarget,
10836  MachineInstr &MI, const SDNode *Node) {
10837  bool isThumb1 = Subtarget->isThumb1Only();
10838 
10839  DebugLoc DL = MI.getDebugLoc();
10840  MachineFunction *MF = MI.getParent()->getParent();
10842  MachineInstrBuilder MIB(*MF, MI);
10843 
10844  // If the new dst/src is unused mark it as dead.
10845  if (!Node->hasAnyUseOfValue(0)) {
10846  MI.getOperand(0).setIsDead(true);
10847  }
10848  if (!Node->hasAnyUseOfValue(1)) {
10849  MI.getOperand(1).setIsDead(true);
10850  }
10851 
10852  // The MEMCPY both defines and kills the scratch registers.
10853  for (unsigned I = 0; I != MI.getOperand(4).getImm(); ++I) {
10854  Register TmpReg = MRI.createVirtualRegister(isThumb1 ? &ARM::tGPRRegClass
10855  : &ARM::GPRRegClass);
10856  MIB.addReg(TmpReg, RegState::Define|RegState::Dead);
10857  }
10858 }
10859 
10861  SDNode *Node) const {
10862  if (MI.getOpcode() == ARM::MEMCPY) {
10863  attachMEMCPYScratchRegs(Subtarget, MI, Node);
10864  return;
10865  }
10866 
10867  const MCInstrDesc *MCID = &MI.getDesc();
10868  // Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB,
10869  // RSC. Coming out of isel, they have an implicit CPSR def, but the optional
10870  // operand is still set to noreg. If needed, set the optional operand's
10871  // register to CPSR, and remove the redundant implicit def.
10872  //
10873  // e.g. ADCS (..., implicit-def CPSR) -> ADC (... opt:def CPSR).
10874 
10875  // Rename pseudo opcodes.
10876  unsigned NewOpc = convertAddSubFlagsOpcode(MI.getOpcode());
10877  unsigned ccOutIdx;
10878  if (NewOpc) {
10879  const ARMBaseInstrInfo *TII = Subtarget->getInstrInfo();
10880  MCID = &TII->get(NewOpc);
10881 
10882  assert(MCID->getNumOperands() ==
10883  MI.getDesc().getNumOperands() + 5 - MI.getDesc().getSize()
10884  && "converted opcode should be the same except for cc_out"
10885  " (and, on Thumb1, pred)");
10886 
10887  MI.setDesc(*MCID);
10888 
10889  // Add the optional cc_out operand
10890  MI.addOperand(MachineOperand::CreateReg(0, /*isDef=*/true));
10891 
10892  // On Thumb1, move all input operands to the end, then add the predicate
10893  if (Subtarget->isThumb1Only()) {
10894  for (unsigned c = MCID->getNumOperands() - 4; c--;) {
10895  MI.addOperand(MI.getOperand(1));
10896  MI.RemoveOperand(1);
10897  }
10898 
10899  // Restore the ties
10900  for (unsigned i = MI.getNumOperands(); i--;) {
10901  const MachineOperand& op = MI.getOperand(i);
10902  if (op.isReg() && op.isUse()) {
10903  int DefIdx = MCID->getOperandConstraint(i, MCOI::TIED_TO);
10904  if (DefIdx != -1)
10905  MI.tieOperands(DefIdx, i);
10906  }
10907  }
10908 
10910  MI.addOperand(MachineOperand::CreateReg(0, /*isDef=*/false));
10911  ccOutIdx = 1;
10912  } else
10913  ccOutIdx = MCID->getNumOperands() - 1;
10914  } else
10915  ccOutIdx = MCID->getNumOperands() - 1;
10916 
10917  // Any ARM instruction that sets the 's' bit should specify an optional
10918  // "cc_out" operand in the last operand position.
10919  if (!MI.hasOptionalDef() || !MCID->OpInfo[ccOutIdx].isOptionalDef()) {
10920  assert(!NewOpc && "Optional cc_out operand required");
10921  return;
10922  }
10923  // Look for an implicit def of CPSR added by MachineInstr ctor. Remove it
10924  // since we already have an optional CPSR def.
10925  bool definesCPSR = false;
10926  bool deadCPSR = false;
10927  for (unsigned i = MCID->getNumOperands(), e = MI.getNumOperands(); i != e;
10928  ++i) {
10929  const MachineOperand &MO = MI.getOperand(i);
10930  if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) {
10931  definesCPSR = true;
10932  if (MO.isDead())
10933  deadCPSR = true;
10934  MI.RemoveOperand(i);
10935  break;
10936  }
10937  }
10938  if (!definesCPSR) {
10939  assert(!NewOpc && "Optional cc_out operand required");
10940  return;
10941  }
10942  assert(deadCPSR == !Node->hasAnyUseOfValue(1) && "inconsistent dead flag");
10943  if (deadCPSR) {
10944  assert(!MI.getOperand(ccOutIdx).getReg() &&
10945  "expect uninitialized optional cc_out operand");
10946  // Thumb1 instructions must have the S bit even if the CPSR is dead.
10947  if (!Subtarget->isThumb1Only())
10948  return;
10949  }
10950 
10951  // If this instruction was defined with an optional CPSR def and its dag node
10952  // had a live implicit CPSR def, then activate the optional CPSR def.
10953  MachineOperand &MO = MI.getOperand(ccOutIdx);
10954  MO.setReg(ARM::CPSR);
10955  MO.setIsDef(true);
10956 }
10957 
10958 //===----------------------------------------------------------------------===//
10959 // ARM Optimization Hooks
10960 //===----------------------------------------------------------------------===//
10961 
10962 // Helper function that checks if N is a null or all ones constant.
10963 static inline bool isZeroOrAllOnes(SDValue N, bool AllOnes) {
10964  return AllOnes ? isAllOnesConstant(N) : isNullConstant(N);
10965 }
10966 
10967 // Return true if N is conditionally 0 or all ones.
10968 // Detects these expressions where cc is an i1 value:
10969 //
10970 // (select cc 0, y) [AllOnes=0]
10971 // (select cc y, 0) [AllOnes=0]
10972 // (zext cc) [AllOnes=0]
10973 // (sext cc) [AllOnes=0/1]
10974 // (select cc -1, y) [AllOnes=1]
10975 // (select cc y, -1) [AllOnes=1]
10976 //
10977 // Invert is set when N is the null/all ones constant when CC is false.
10978 // OtherOp is set to the alternative value of N.
10979 static bool isConditionalZeroOrAllOnes(SDNode *N, bool AllOnes,
10980  SDValue &CC, bool &Invert,
10981  SDValue &OtherOp,
10982  SelectionDAG &DAG) {
10983  switch (N->getOpcode()) {
10984  default: return false;
10985  case ISD::SELECT: {
10986  CC = N->getOperand(0);
10987  SDValue N1 = N->getOperand(1);
10988  SDValue N2 = N->getOperand(2);
10989  if (isZeroOrAllOnes(N1, AllOnes)) {
10990  Invert = false;
10991  OtherOp = N2;
10992  return true;
10993  }
10994  if (isZeroOrAllOnes(N2, AllOnes)) {
10995  Invert = true;
10996  OtherOp = N1;
10997  return true;
10998  }
10999  return false;
11000  }
11001  case ISD::ZERO_EXTEND:
11002  // (zext cc) can never be the all ones value.
11003  if (AllOnes)
11004  return false;
11006  case ISD::SIGN_EXTEND: {
11007  SDLoc dl(N);
11008  EVT VT = N->getValueType(0);
11009  CC = N->getOperand(0);
11010  if (CC.getValueType() != MVT::i1 || CC.getOpcode() != ISD::SETCC)
11011  return false;
11012  Invert = !AllOnes;
11013  if (AllOnes)
11014  // When looking for an AllOnes constant, N is an sext, and the 'other'
11015  // value is 0.
11016  OtherOp = DAG.getConstant(0, dl, VT);
11017  else if (N->getOpcode() == ISD::ZERO_EXTEND)
11018  // When looking for a 0 constant, N can be zext or sext.
11019  OtherOp = DAG.getConstant(1, dl, VT);
11020  else
11021  OtherOp = DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), dl,
11022  VT);
11023  return true;
11024  }
11025  }
11026 }
11027 
11028 // Combine a constant select operand into its use:
11029 //
11030 // (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
11031 // (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
11032 // (and (select cc, -1, c), x) -> (select cc, x, (and, x, c)) [AllOnes=1]
11033 // (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))
11034 // (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c))
11035 //
11036 // The transform is rejected if the select doesn't have a constant operand that
11037 // is null, or all ones when AllOnes is set.
11038 //
11039 // Also recognize sext/zext from i1:
11040 //
11041 // (add (zext cc), x) -> (select cc (add x, 1), x)
11042 // (add (sext cc), x) -> (select cc (add x, -1), x)
11043 //
11044 // These transformations eventually create predicated instructions.
11045 //
11046 // @param N The node to transform.
11047 // @param Slct The N operand that is a select.
11048 // @param OtherOp The other N operand (x above).
11049 // @param DCI Context.
11050 // @param AllOnes Require the select constant to be all ones instead of null.
11051 // @returns The new node, or SDValue() on failure.
11052 static
11055  bool AllOnes = false) {
11056  SelectionDAG &DAG = DCI.DAG;
11057  EVT VT = N->getValueType(0);
11058  SDValue NonConstantVal;
11059  SDValue CCOp;
11060  bool SwapSelectOps;
11061  if (!isConditionalZeroOrAllOnes(Slct.getNode(), AllOnes, CCOp, SwapSelectOps,
11062  NonConstantVal, DAG))
11063  return SDValue();
11064 
11065  // Slct is now know to be the desired identity constant when CC is true.
11066  SDValue TrueVal = OtherOp;
11067  SDValue FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
11068  OtherOp, NonConstantVal);
11069  // Unless SwapSelectOps says CC should be false.
11070  if (SwapSelectOps)
11071  std::swap(TrueVal, FalseVal);
11072 
11073  return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
11074  CCOp, TrueVal, FalseVal);
11075 }
11076 
11077 // Attempt combineSelectAndUse on each operand of a commutative operator N.
11078 static
11081  SDValue N0 = N->getOperand(0);
11082  SDValue N1 = N->getOperand(1);
11083  if (N0.getNode()->hasOneUse())
11084  if (SDValue Result = combineSelectAndUse(N, N0, N1, DCI, AllOnes))
11085  return Result;
11086  if (N1.getNode()->hasOneUse())
11087  if (SDValue Result = combineSelectAndUse(N, N1, N0, DCI, AllOnes))
11088  return Result;
11089  return SDValue();
11090 }
11091 
11092 static bool IsVUZPShuffleNode(SDNode *N) {
11093  // VUZP shuffle node.
11094  if (N->getOpcode() == ARMISD::VUZP)
11095  return true;
11096 
11097  // "VUZP" on i32 is an alias for VTRN.
11098  if (N->getOpcode() == ARMISD::VTRN && N->getValueType(0) == MVT::v2i32)
11099  return true;
11100 
11101  return false;
11102 }
11103 
11106  const ARMSubtarget *Subtarget) {
11107  // Look for ADD(VUZP.0, VUZP.1).
11108  if (!IsVUZPShuffleNode(N0.getNode()) || N0.getNode() != N1.getNode() ||
11109  N0 == N1)
11110  return SDValue();
11111 
11112  // Make sure the ADD is a 64-bit add; there is no 128-bit VPADD.
11113  if (!N->getValueType(0).is64BitVector())
11114  return SDValue();
11115 
11116  // Generate vpadd.
11117  SelectionDAG &DAG = DCI.DAG;
11118  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11119  SDLoc dl(N);
11120  SDNode *Unzip = N0.getNode();
11121  EVT VT = N->getValueType(0);
11122 
11124  Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpadd, dl,
11125  TLI.getPointerTy(DAG.getDataLayout())));
11126  Ops.push_back(Unzip->getOperand(0));
11127  Ops.push_back(Unzip->getOperand(1));
11128 
11129  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, Ops);
11130 }
11131 
11134  const ARMSubtarget *Subtarget) {
11135  // Check for two extended operands.
11136  if (!(N0.getOpcode() == ISD::SIGN_EXTEND &&
11137  N1.getOpcode() == ISD::SIGN_EXTEND) &&
11138  !(N0.getOpcode() == ISD::ZERO_EXTEND &&
11139  N1.getOpcode() == ISD::ZERO_EXTEND))
11140  return SDValue();
11141 
11142  SDValue N00 = N0.getOperand(0);
11143  SDValue N10 = N1.getOperand(0);
11144 
11145  // Look for ADD(SEXT(VUZP.0), SEXT(VUZP.1))
11146  if (!IsVUZPShuffleNode(N00.getNode()) || N00.getNode() != N10.getNode() ||
11147  N00 == N10)
11148  return SDValue();
11149 
11150  // We only recognize Q register paddl here; this can't be reached until
11151  // after type legalization.
11152  if (!N00.getValueType().is64BitVector() ||
11153  !N0.getValueType().is128BitVector())
11154  return SDValue();
11155 
11156  // Generate vpaddl.
11157  SelectionDAG &DAG = DCI.DAG;
11158  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11159  SDLoc dl(N);
11160  EVT VT = N->getValueType(0);
11161 
11163  // Form vpaddl.sN or vpaddl.uN depending on the kind of extension.
11164  unsigned Opcode;
11165  if (N0.getOpcode() == ISD::SIGN_EXTEND)
11166  Opcode = Intrinsic::arm_neon_vpaddls;
11167  else
11168  Opcode = Intrinsic::arm_neon_vpaddlu;
11169  Ops.push_back(DAG.getConstant(Opcode, dl,
11170  TLI.getPointerTy(DAG.getDataLayout())));
11171  EVT ElemTy = N00.getValueType().getVectorElementType();
11172  unsigned NumElts = VT.getVectorNumElements();
11173  EVT ConcatVT = EVT::getVectorVT(*DAG.getContext(), ElemTy, NumElts * 2);
11174  SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), ConcatVT,
11175  N00.getOperand(0), N00.getOperand(1));
11176  Ops.push_back(Concat);
11177 
11178  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, Ops);
11179 }
11180 
11181 // FIXME: This function shouldn't be necessary; if we lower BUILD_VECTOR in
11182 // an appropriate manner, we end up with ADD(VUZP(ZEXT(N))), which is
11183 // much easier to match.
11184 static SDValue
11187  const ARMSubtarget *Subtarget) {
11188  // Only perform optimization if after legalize, and if NEON is available. We
11189  // also expected both operands to be BUILD_VECTORs.
11190  if (DCI.isBeforeLegalize() || !Subtarget->hasNEON()
11191  || N0.getOpcode() != ISD::BUILD_VECTOR
11192  || N1.getOpcode() != ISD::BUILD_VECTOR)
11193  return SDValue();
11194 
11195  // Check output type since VPADDL operand elements can only be 8, 16, or 32.
11196  EVT VT = N->getValueType(0);
11197  if (!VT.isInteger() || VT.getVectorElementType() == MVT::i64)
11198  return SDValue();
11199 
11200  // Check that the vector operands are of the right form.
11201  // N0 and N1 are BUILD_VECTOR nodes with N number of EXTRACT_VECTOR
11202  // operands, where N is the size of the formed vector.
11203  // Each EXTRACT_VECTOR should have the same input vector and odd or even
11204  // index such that we have a pair wise add pattern.
11205 
11206  // Grab the vector that all EXTRACT_VECTOR nodes should be referencing.
11207  if (N0->getOperand(0)->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
11208  return SDValue();
11209  SDValue Vec = N0->getOperand(0)->getOperand(0);
11210  SDNode *V = Vec.getNode();
11211  unsigned nextIndex = 0;
11212 
11213  // For each operands to the ADD which are BUILD_VECTORs,
11214  // check to see if each of their operands are an EXTRACT_VECTOR with
11215  // the same vector and appropriate index.
11216  for (unsigned i = 0, e = N0->getNumOperands(); i != e; ++i) {
11218  && N1->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
11219 
11220  SDValue ExtVec0 = N0->getOperand(i);
11221  SDValue ExtVec1 = N1->getOperand(i);
11222 
11223  // First operand is the vector, verify its the same.
11224  if (V != ExtVec0->getOperand(0).getNode() ||
11225  V != ExtVec1->getOperand(0).getNode())
11226  return SDValue();
11227 
11228  // Second is the constant, verify its correct.
11229  ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(ExtVec0->getOperand(1));
11230  ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(ExtVec1->getOperand(1));
11231 
11232  // For the constant, we want to see all the even or all the odd.
11233  if (!C0 || !C1 || C0->getZExtValue() != nextIndex
11234  || C1->getZExtValue() != nextIndex+1)
11235  return SDValue();
11236 
11237  // Increment index.
11238  nextIndex+=2;
11239  } else
11240  return SDValue();
11241  }
11242 
11243  // Don't generate vpaddl+vmovn; we'll match it to vpadd later. Also make sure
11244  // we're using the entire input vector, otherwise there's a size/legality
11245  // mismatch somewhere.
11246  if (nextIndex != Vec.getValueType().getVectorNumElements() ||
11248  return SDValue();
11249 
11250  // Create VPADDL node.
11251  SelectionDAG &DAG = DCI.DAG;
11252  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11253 
11254  SDLoc dl(N);
11255 
11256  // Build operand list.
11258  Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddls, dl,
11259  TLI.getPointerTy(DAG.getDataLayout())));
11260 
11261  // Input is the vector.
11262  Ops.push_back(Vec);
11263 
11264  // Get widened type and narrowed type.
11265  MVT widenType;
11266  unsigned numElem = VT.getVectorNumElements();
11267 
11268  EVT inputLaneType = Vec.getValueType().getVectorElementType();
11269  switch (inputLaneType.getSimpleVT().SimpleTy) {
11270  case MVT::i8: widenType = MVT::getVectorVT(MVT::i16, numElem); break;
11271  case MVT::i16: widenType = MVT::getVectorVT(MVT::i32, numElem); break;
11272  case MVT::i32: widenType = MVT::getVectorVT(MVT::i64, numElem); break;
11273  default:
11274  llvm_unreachable("Invalid vector element type for padd optimization.");
11275  }
11276 
11277  SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, widenType, Ops);
11278  unsigned ExtOp = VT.bitsGT(tmp.getValueType()) ? ISD::ANY_EXTEND : ISD::TRUNCATE;
11279  return DAG.getNode(ExtOp, dl, VT, tmp);
11280 }
11281 
11283  if (V->getOpcode() == ISD::UMUL_LOHI ||
11284  V->getOpcode() == ISD::SMUL_LOHI)
11285  return V;
11286  return SDValue();
11287 }
11288 
11289 static SDValue AddCombineTo64BitSMLAL16(SDNode *AddcNode, SDNode *AddeNode,
11291  const ARMSubtarget *Subtarget) {
11292  if (!Subtarget->hasBaseDSP())
11293  return SDValue();
11294 
11295  // SMLALBB, SMLALBT, SMLALTB, SMLALTT multiply two 16-bit values and
11296  // accumulates the product into a 64-bit value. The 16-bit values will
11297  // be sign extended somehow or SRA'd into 32-bit values
11298  // (addc (adde (mul 16bit, 16bit), lo), hi)
11299  SDValue Mul = AddcNode->getOperand(0);
11300  SDValue Lo = AddcNode->getOperand(1);
11301  if (Mul.getOpcode() != ISD::MUL) {
11302  Lo = AddcNode->getOperand(0);
11303  Mul = AddcNode->getOperand(1);
11304  if (Mul.getOpcode() != ISD::MUL)
11305  return SDValue();
11306  }
11307 
11308  SDValue SRA = AddeNode->getOperand(0);
11309  SDValue Hi = AddeNode->getOperand(1);
11310  if (SRA.getOpcode() != ISD::SRA) {
11311  SRA = AddeNode->getOperand(1);
11312  Hi = AddeNode->getOperand(0);
11313  if (SRA.getOpcode() != ISD::SRA)
11314  return SDValue();
11315  }
11316  if (auto Const = dyn_cast<ConstantSDNode>(SRA.getOperand(1))) {
11317  if (Const->getZExtValue() != 31)
11318  return SDValue();
11319  } else
11320  return SDValue();
11321 
11322  if (SRA.getOperand(0) != Mul)
11323  return SDValue();
11324 
11325  SelectionDAG &DAG = DCI.DAG;
11326  SDLoc dl(AddcNode);
11327  unsigned Opcode = 0;
11328  SDValue Op0;
11329  SDValue Op1;
11330 
11331  if (isS16(Mul.getOperand(0), DAG) && isS16(Mul.getOperand(1), DAG)) {
11332  Opcode = ARMISD::SMLALBB;
11333  Op0 = Mul.getOperand(0);
11334  Op1 = Mul.getOperand(1);
11335  } else if (isS16(Mul.getOperand(0), DAG) && isSRA16(Mul.getOperand(1))) {
11336  Opcode = ARMISD::SMLALBT;
11337  Op0 = Mul.getOperand(0);
11338  Op1 = Mul.getOperand(1).getOperand(0);
11339  } else if (isSRA16(Mul.getOperand(0)) && isS16(Mul.getOperand(1), DAG)) {
11340  Opcode = ARMISD::SMLALTB;
11341  Op0 = Mul.getOperand(0).getOperand(0);
11342  Op1 = Mul.getOperand(1);
11343  } else if (isSRA16(Mul.getOperand(0)) && isSRA16(Mul.getOperand(1))) {
11344  Opcode = ARMISD::SMLALTT;
11345  Op0 = Mul->getOperand(0).getOperand(0);
11346  Op1 = Mul->getOperand(1).getOperand(0);
11347  }
11348 
11349  if (!Op0 || !Op1)
11350  return SDValue();
11351 
11352  SDValue SMLAL = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32),
11353  Op0, Op1, Lo, Hi);
11354  // Replace the ADDs' nodes uses by the MLA node's values.
11355  SDValue HiMLALResult(SMLAL.getNode(), 1);
11356  SDValue LoMLALResult(SMLAL.getNode(), 0);
11357 
11358  DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), LoMLALResult);
11359  DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), HiMLALResult);
11360 
11361  // Return original node to notify the driver to stop replacing.
11362  SDValue resNode(AddcNode, 0);
11363  return resNode;
11364 }
11365 
11366 static SDValue AddCombineTo64bitMLAL(SDNode *AddeSubeNode,
11368  const ARMSubtarget *Subtarget) {
11369  // Look for multiply add opportunities.
11370  // The pattern is a ISD::UMUL_LOHI followed by two add nodes, where
11371  // each add nodes consumes a value from ISD::UMUL_LOHI and there is
11372  // a glue link from the first add to the second add.
11373  // If we find this pattern, we can replace the U/SMUL_LOHI, ADDC, and ADDE by
11374  // a S/UMLAL instruction.
11375  // UMUL_LOHI
11376  // / :lo \ :hi
11377  // V \ [no multiline comment]
11378  // loAdd -> ADDC |
11379  // \ :carry /
11380  // V V
11381  // ADDE <- hiAdd
11382  //
11383  // In the special case where only the higher part of a signed result is used
11384  // and the add to the low part of the result of ISD::UMUL_LOHI adds or subtracts
11385  // a constant with the exact value of 0x80000000, we recognize we are dealing
11386  // with a "rounded multiply and add" (or subtract) and transform it into
11387  // either a ARMISD::SMMLAR or ARMISD::SMMLSR respectively.
11388 
11389  assert((AddeSubeNode->getOpcode() == ARMISD::ADDE ||
11390  AddeSubeNode->getOpcode() == ARMISD::SUBE) &&
11391  "Expect an ADDE or SUBE");
11392 
11393  assert(AddeSubeNode->getNumOperands() == 3 &&
11394  AddeSubeNode->getOperand(2).getValueType() == MVT::i32 &&
11395  "ADDE node has the wrong inputs");
11396 
11397  // Check that we are chained to the right ADDC or SUBC node.
11398  SDNode *AddcSubcNode = AddeSubeNode->getOperand(2).getNode();
11399  if ((AddeSubeNode->getOpcode() == ARMISD::ADDE &&
11400  AddcSubcNode->getOpcode() != ARMISD::ADDC) ||
11401  (AddeSubeNode->getOpcode() == ARMISD::SUBE &&
11402  AddcSubcNode->getOpcode() != ARMISD::SUBC))
11403  return SDValue();
11404 
11405  SDValue AddcSubcOp0 = AddcSubcNode->getOperand(0);
11406  SDValue AddcSubcOp1 = AddcSubcNode->getOperand(1);
11407 
11408  // Check if the two operands are from the same mul_lohi node.
11409  if (AddcSubcOp0.getNode() == AddcSubcOp1.getNode())
11410  return SDValue();
11411 
11412  assert(AddcSubcNode->getNumValues() == 2 &&
11413  AddcSubcNode->getValueType(0) == MVT::i32 &&
11414  "Expect ADDC with two result values. First: i32");
11415 
11416  // Check that the ADDC adds the low result of the S/UMUL_LOHI. If not, it
11417  // maybe a SMLAL which multiplies two 16-bit values.
11418  if (AddeSubeNode->getOpcode() == ARMISD::ADDE &&
11419  AddcSubcOp0->getOpcode() != ISD::UMUL_LOHI &&
11420  AddcSubcOp0->getOpcode() != ISD::SMUL_LOHI &&
11421  AddcSubcOp1->getOpcode() != ISD::UMUL_LOHI &&
11422  AddcSubcOp1->getOpcode() != ISD::SMUL_LOHI)
11423  return AddCombineTo64BitSMLAL16(AddcSubcNode, AddeSubeNode, DCI, Subtarget);
11424 
11425  // Check for the triangle shape.
11426  SDValue AddeSubeOp0 = AddeSubeNode->getOperand(0);
11427  SDValue AddeSubeOp1 = AddeSubeNode->getOperand(1);
11428 
11429  // Make sure that the ADDE/SUBE operands are not coming from the same node.
11430  if (AddeSubeOp0.getNode() == AddeSubeOp1.getNode())
11431  return SDValue();
11432 
11433  // Find the MUL_LOHI node walking up ADDE/SUBE's operands.
11434  bool IsLeftOperandMUL = false;
11435  SDValue MULOp = findMUL_LOHI(AddeSubeOp0);
11436  if (MULOp == SDValue())
11437  MULOp = findMUL_LOHI(AddeSubeOp1);
11438  else
11439  IsLeftOperandMUL = true;
11440  if (MULOp == SDValue())
11441  return SDValue();
11442 
11443  // Figure out the right opcode.
11444  unsigned Opc = MULOp->getOpcode();
11445  unsigned FinalOpc = (Opc == ISD::SMUL_LOHI) ? ARMISD::SMLAL : ARMISD::UMLAL;
11446 
11447  // Figure out the high and low input values to the MLAL node.
11448  SDValue *HiAddSub = nullptr;
11449  SDValue *LoMul = nullptr;
11450  SDValue *LowAddSub = nullptr;
11451 
11452  // Ensure that ADDE/SUBE is from high result of ISD::xMUL_LOHI.
11453  if ((AddeSubeOp0 != MULOp.getValue(1)) && (AddeSubeOp1 != MULOp.getValue(1)))
11454  return SDValue();
11455 
11456  if (IsLeftOperandMUL)
11457  HiAddSub = &AddeSubeOp1;
11458  else
11459  HiAddSub = &AddeSubeOp0;
11460 
11461  // Ensure that LoMul and LowAddSub are taken from correct ISD::SMUL_LOHI node
11462  // whose low result is fed to the ADDC/SUBC we are checking.
11463 
11464  if (AddcSubcOp0 == MULOp.getValue(0)) {
11465  LoMul = &AddcSubcOp0;
11466  LowAddSub = &AddcSubcOp1;
11467  }
11468  if (AddcSubcOp1 == MULOp.getValue(0)) {
11469  LoMul = &AddcSubcOp1;
11470  LowAddSub = &AddcSubcOp0;
11471  }
11472 
11473  if (!LoMul)
11474  return SDValue();
11475 
11476  // If HiAddSub is the same node as ADDC/SUBC or is a predecessor of ADDC/SUBC
11477  // the replacement below will create a cycle.
11478  if (AddcSubcNode == HiAddSub->getNode() ||
11479  AddcSubcNode->isPredecessorOf(HiAddSub->getNode()))
11480  return SDValue();
11481 
11482  // Create the merged node.
11483  SelectionDAG &DAG = DCI.DAG;
11484 
11485  // Start building operand list.
11487  Ops.push_back(LoMul->getOperand(0));
11488  Ops.push_back(LoMul->getOperand(1));
11489 
11490  // Check whether we can use SMMLAR, SMMLSR or SMMULR instead. For this to be
11491  // the case, we must be doing signed multiplication and only use the higher
11492  // part of the result of the MLAL, furthermore the LowAddSub must be a constant
11493  // addition or subtraction with the value of 0x800000.
11494  if (Subtarget->hasV6Ops() && Subtarget->hasDSP() && Subtarget->useMulOps() &&
11495  FinalOpc == ARMISD::SMLAL && !AddeSubeNode->hasAnyUseOfValue(1) &&
11496  LowAddSub->getNode()->getOpcode() == ISD::Constant &&
11497  static_cast<ConstantSDNode *>(LowAddSub->getNode())->getZExtValue() ==
11498  0x80000000) {
11499  Ops.push_back(*HiAddSub);
11500  if (AddcSubcNode->getOpcode() == ARMISD::SUBC) {
11501  FinalOpc = ARMISD::SMMLSR;
11502  } else {
11503  FinalOpc = ARMISD::SMMLAR;
11504  }
11505  SDValue NewNode = DAG.getNode(FinalOpc, SDLoc(AddcSubcNode), MVT::i32, Ops);
11506  DAG.ReplaceAllUsesOfValueWith(SDValue(AddeSubeNode, 0), NewNode);
11507 
11508  return SDValue(AddeSubeNode, 0);
11509  } else if (AddcSubcNode->getOpcode() == ARMISD::SUBC)
11510  // SMMLS is generated during instruction selection and the rest of this
11511  // function can not handle the case where AddcSubcNode is a SUBC.
11512  return SDValue();
11513 
11514  // Finish building the operand list for {U/S}MLAL
11515  Ops.push_back(*LowAddSub);
11516  Ops.push_back(*HiAddSub);
11517 
11518  SDValue MLALNode = DAG.getNode(FinalOpc, SDLoc(AddcSubcNode),
11519  DAG.getVTList(MVT::i32, MVT::i32), Ops);
11520 
11521  // Replace the ADDs' nodes uses by the MLA node's values.
11522  SDValue HiMLALResult(MLALNode.getNode(), 1);
11523  DAG.ReplaceAllUsesOfValueWith(SDValue(AddeSubeNode, 0), HiMLALResult);
11524 
11525  SDValue LoMLALResult(MLALNode.getNode(), 0);
11526  DAG.ReplaceAllUsesOfValueWith(SDValue(AddcSubcNode, 0), LoMLALResult);
11527 
11528  // Return original node to notify the driver to stop replacing.
11529  return SDValue(AddeSubeNode, 0);
11530 }
11531 
11534  const ARMSubtarget *Subtarget) {
11535  // UMAAL is similar to UMLAL except that it adds two unsigned values.
11536  // While trying to combine for the other MLAL nodes, first search for the
11537  // chance to use UMAAL. Check if Addc uses a node which has already
11538  // been combined into a UMLAL. The other pattern is UMLAL using Addc/Adde
11539  // as the addend, and it's handled in PerformUMLALCombine.
11540 
11541  if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
11542  return AddCombineTo64bitMLAL(AddeNode, DCI, Subtarget);
11543 
11544  // Check that we have a glued ADDC node.
11545  SDNode* AddcNode = AddeNode->getOperand(2).getNode();
11546  if (AddcNode->getOpcode() != ARMISD::ADDC)
11547  return SDValue();
11548 
11549  // Find the converted UMAAL or quit if it doesn't exist.
11550  SDNode *UmlalNode = nullptr;
11551  SDValue AddHi;
11552  if (AddcNode->getOperand(0).getOpcode() == ARMISD::UMLAL) {
11553  UmlalNode = AddcNode->getOperand(0).getNode();
11554  AddHi = AddcNode->getOperand(1);
11555  } else if (AddcNode->getOperand(1).getOpcode() == ARMISD::UMLAL) {
11556  UmlalNode = AddcNode->getOperand(1).getNode();
11557  AddHi = AddcNode->getOperand(0);
11558  } else {
11559  return AddCombineTo64bitMLAL(AddeNode, DCI, Subtarget);
11560  }
11561 
11562  // The ADDC should be glued to an ADDE node, which uses the same UMLAL as
11563  // the ADDC as well as Zero.
11564  if (!isNullConstant(UmlalNode->getOperand(3)))
11565  return SDValue();
11566 
11567  if ((isNullConstant(AddeNode->getOperand(0)) &&
11568  AddeNode->getOperand(1).getNode() == UmlalNode) ||
11569  (AddeNode->getOperand(0).getNode() == UmlalNode &&
11570  isNullConstant(AddeNode->getOperand(1)))) {
11571  SelectionDAG &DAG = DCI.DAG;
11572  SDValue Ops[] = { UmlalNode->getOperand(0), UmlalNode->getOperand(1),
11573  UmlalNode->getOperand(2), AddHi };
11574  SDValue UMAAL = DAG.getNode(ARMISD::UMAAL, SDLoc(AddcNode),
11575  DAG.getVTList(MVT::i32, MVT::i32), Ops);
11576 
11577  // Replace the ADDs' nodes uses by the UMAAL node's values.
11578  DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), SDValue(UMAAL.getNode(), 1));
11579  DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), SDValue(UMAAL.getNode(), 0));
11580 
11581  // Return original node to notify the driver to stop replacing.
11582  return SDValue(AddeNode, 0);
11583  }
11584  return SDValue();
11585 }
11586 
11588  const ARMSubtarget *Subtarget) {
11589  if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
11590  return SDValue();
11591 
11592  // Check that we have a pair of ADDC and ADDE as operands.
11593  // Both addends of the ADDE must be zero.
11594  SDNode* AddcNode = N->getOperand(2).getNode();
11595  SDNode* AddeNode = N->getOperand(3).getNode();
11596  if ((AddcNode->getOpcode() == ARMISD::ADDC) &&
11597  (AddeNode->getOpcode() == ARMISD::ADDE) &&
11598  isNullConstant(AddeNode->getOperand(0)) &&
11599  isNullConstant(AddeNode->getOperand(1)) &&
11600  (AddeNode->getOperand(2).getNode() == AddcNode))
11601  return DAG.getNode(ARMISD::UMAAL, SDLoc(N),
11602  DAG.getVTList(MVT::i32, MVT::i32),
11603  {N->getOperand(0), N->getOperand(1),
11604  AddcNode->getOperand(0), AddcNode->getOperand(1)});
11605  else
11606  return SDValue();
11607 }
11608 
11611  const ARMSubtarget *Subtarget) {
11612  SelectionDAG &DAG(DCI.DAG);
11613 
11614  if (N->getOpcode() == ARMISD::SUBC) {
11615  // (SUBC (ADDE 0, 0, C), 1) -> C
11616  SDValue LHS = N->getOperand(0);
11617  SDValue RHS = N->getOperand(1);
11618  if (LHS->getOpcode() == ARMISD::ADDE &&
11619  isNullConstant(LHS->getOperand(0)) &&
11620  isNullConstant(LHS->getOperand(1)) && isOneConstant(RHS)) {
11621  return DCI.CombineTo(N, SDValue(N, 0), LHS->getOperand(2));
11622  }
11623  }
11624 
11625  if (Subtarget->isThumb1Only()) {
11626  SDValue RHS = N->getOperand(1);
11627  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
11628  int32_t imm = C->getSExtValue();
11629  if (imm < 0 && imm > std::numeric_limits<int>::min()) {
11630  SDLoc DL(N);
11631  RHS = DAG.getConstant(-imm, DL, MVT::i32);
11632  unsigned Opcode = (N->getOpcode() == ARMISD::ADDC) ? ARMISD::SUBC
11633  : ARMISD::ADDC;
11634  return DAG.getNode(Opcode, DL, N->getVTList(), N->getOperand(0), RHS);
11635  }
11636  }
11637  }
11638 
11639  return SDValue();
11640 }
11641 
11644  const ARMSubtarget *Subtarget) {
11645  if (Subtarget->isThumb1Only()) {
11646  SelectionDAG &DAG = DCI.DAG;
11647  SDValue RHS = N->getOperand(1);
11648  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
11649  int64_t imm = C->getSExtValue();
11650  if (imm < 0) {
11651  SDLoc DL(N);
11652 
11653  // The with-carry-in form matches bitwise not instead of the negation.
11654  // Effectively, the inverse interpretation of the carry flag already
11655  // accounts for part of the negation.
11656  RHS = DAG.getConstant(~imm, DL, MVT::i32);
11657 
11658  unsigned Opcode = (N->getOpcode() == ARMISD::ADDE) ? ARMISD::SUBE
11659  : ARMISD::ADDE;
11660  return DAG.getNode(Opcode, DL, N->getVTList(),
11661  N->getOperand(0), RHS, N->getOperand(2));
11662  }
11663  }
11664  } else if (N->getOperand(1)->getOpcode() == ISD::SMUL_LOHI) {
11665  return AddCombineTo64bitMLAL(N, DCI, Subtarget);
11666  }
11667  return SDValue();
11668 }
11669 
11672  const ARMSubtarget *Subtarget) {
11673  SDValue res;
11674  SelectionDAG &DAG = DCI.DAG;
11675  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11676 
11677  if (TLI.isOperationLegal(N->getOpcode(), N->getValueType(0)))
11678  return SDValue();
11679 
11680  if (!TLI.expandABS(N, res, DAG))
11681  return SDValue();
11682 
11683  return res;
11684 }
11685 
11686 /// PerformADDECombine - Target-specific dag combine transform from
11687 /// ARMISD::ADDC, ARMISD::ADDE, and ISD::MUL_LOHI to MLAL or
11688 /// ARMISD::ADDC, ARMISD::ADDE and ARMISD::UMLAL to ARMISD::UMAAL
11691  const ARMSubtarget *Subtarget) {
11692  // Only ARM and Thumb2 support UMLAL/SMLAL.
11693  if (Subtarget->isThumb1Only())
11694  return PerformAddeSubeCombine(N, DCI, Subtarget);
11695 
11696  // Only perform the checks after legalize when the pattern is available.
11697  if (DCI.isBeforeLegalize()) return SDValue();
11698 
11699  return AddCombineTo64bitUMAAL(N, DCI, Subtarget);
11700 }
11701 
11702 /// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
11703 /// operands N0 and N1. This is a helper for PerformADDCombine that is
11704 /// called with the default operands, and if that fails, with commuted
11705 /// operands.
11708  const ARMSubtarget *Subtarget){
11709  // Attempt to create vpadd for this add.
11710  if (SDValue Result = AddCombineToVPADD(N, N0, N1, DCI, Subtarget))
11711  return Result;
11712 
11713  // Attempt to create vpaddl for this add.
11714  if (SDValue Result = AddCombineVUZPToVPADDL(N, N0, N1, DCI, Subtarget))
11715  return Result;
11716  if (SDValue Result = AddCombineBUILD_VECTORToVPADDL(N, N0, N1, DCI,
11717  Subtarget))
11718  return Result;
11719 
11720  // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
11721  if (N0.getNode()->hasOneUse())
11722  if (SDValue Result = combineSelectAndUse(N, N0, N1, DCI))
11723  return Result;
11724  return SDValue();
11725 }
11726 
11727 bool
11729  CombineLevel Level) const {
11730  if (Level == BeforeLegalizeTypes)
11731  return true;
11732 
11733  if (N->getOpcode() != ISD::SHL)
11734  return true;
11735 
11736  if (Subtarget->isThumb1Only()) {
11737  // Avoid making expensive immediates by commuting shifts. (This logic
11738  // only applies to Thumb1 because ARM and Thumb2 immediates can be shifted
11739  // for free.)
11740  if (N->getOpcode() != ISD::SHL)
11741  return true;
11742  SDValue N1 = N->getOperand(0);
11743  if (N1->getOpcode() != ISD::ADD && N1->getOpcode() != ISD::AND &&
11744  N1->getOpcode() != ISD::OR && N1->getOpcode() != ISD::XOR)
11745  return true;
11746  if (auto *Const = dyn_cast<ConstantSDNode>(N1->getOperand(1))) {
11747  if (Const->getAPIntValue().ult(256))
11748  return false;
11749  if (N1->getOpcode() == ISD::ADD && Const->getAPIntValue().slt(0) &&
11750  Const->getAPIntValue().sgt(-256))
11751  return false;
11752  }
11753  return true;
11754  }
11755 
11756  // Turn off commute-with-shift transform after legalization, so it doesn't
11757  // conflict with PerformSHLSimplify. (We could try to detect when
11758  // PerformSHLSimplify would trigger more precisely, but it isn't
11759  // really necessary.)
11760  return false;
11761 }
11762 
11764  const SDNode *N, CombineLevel Level) const {
11765  if (!Subtarget->isThumb1Only())
11766  return true;
11767 
11768  if (Level == BeforeLegalizeTypes)
11769  return true;
11770 
11771  return false;
11772 }
11773 
11775  if (!Subtarget->hasNEON()) {
11776  if (Subtarget->isThumb1Only())
11777  return VT.getScalarSizeInBits() <= 32;
11778  return true;
11779  }
11780  return VT.isScalarInteger();
11781 }
11782 
11785  const ARMSubtarget *ST) {
11786  // Allow the generic combiner to identify potential bswaps.
11787  if (DCI.isBeforeLegalize())
11788  return SDValue();
11789 
11790  // DAG combiner will fold:
11791  // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
11792  // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2
11793  // Other code patterns that can be also be modified have the following form:
11794  // b + ((a << 1) | 510)
11795  // b + ((a << 1) & 510)
11796  // b + ((a << 1) ^ 510)
11797  // b + ((a << 1) + 510)
11798 
11799  // Many instructions can perform the shift for free, but it requires both
11800  // the operands to be registers. If c1 << c2 is too large, a mov immediate
11801  // instruction will needed. So, unfold back to the original pattern if:
11802  // - if c1 and c2 are small enough that they don't require mov imms.
11803  // - the user(s) of the node can perform an shl
11804 
11805  // No shifted operands for 16-bit instructions.
11806  if (ST->isThumb() && ST->isThumb1Only())
11807  return SDValue();
11808 
11809  // Check that all the users could perform the shl themselves.
11810  for (auto U : N->uses()) {
11811  switch(U->getOpcode()) {
11812  default:
11813  return SDValue();
11814  case ISD::SUB:
11815  case ISD::ADD:
11816  case ISD::AND:
11817  case ISD::OR:
11818  case ISD::XOR:
11819  case ISD::SETCC:
11820  case ARMISD::CMP:
11821  // Check that the user isn't already using a constant because there
11822  // aren't any instructions that support an immediate operand and a
11823  // shifted operand.
11824  if (isa<ConstantSDNode>(U->getOperand(0)) ||
11825  isa<ConstantSDNode>(U->getOperand(1)))
11826  return SDValue();
11827 
11828  // Check that it's not already using a shift.
11829  if (U->getOperand(0).getOpcode() == ISD::SHL ||
11830  U->getOperand(1).getOpcode() == ISD::SHL)
11831  return SDValue();
11832  break;
11833  }
11834  }
11835 
11836  if (N->getOpcode() != ISD::ADD && N->getOpcode() != ISD::OR &&
11837  N->getOpcode() != ISD::XOR && N->getOpcode() != ISD::AND)
11838  return SDValue();
11839 
11840  if (N->getOperand(0).getOpcode() != ISD::SHL)
11841  return SDValue();
11842 
11843  SDValue SHL = N->getOperand(0);
11844 
11845  auto *C1ShlC2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
11846  auto *C2 = dyn_cast<ConstantSDNode>(SHL.getOperand(1));
11847  if (!C1ShlC2 || !C2)
11848  return SDValue();
11849 
11850  APInt C2Int = C2->getAPIntValue();
11851  APInt C1Int = C1ShlC2->getAPIntValue();
11852 
11853  // Check that performing a lshr will not lose any information.
11855  C2Int.getBitWidth() - C2->getZExtValue());
11856  if ((C1Int & Mask) != C1Int)
11857  return SDValue();
11858 
11859  // Shift the first constant.
11860  C1Int.lshrInPlace(C2Int);
11861 
11862  // The immediates are encoded as an 8-bit value that can be rotated.
11863  auto LargeImm = [](const APInt &Imm) {
11864  unsigned Zeros = Imm.countLeadingZeros() + Imm.countTrailingZeros();
11865  return Imm.getBitWidth() - Zeros > 8;
11866  };
11867 
11868  if (LargeImm(C1Int) || LargeImm(C2Int))
11869  return SDValue();
11870 
11871  SelectionDAG &DAG = DCI.DAG;
11872  SDLoc dl(N);
11873  SDValue X = SHL.getOperand(0);
11874  SDValue BinOp = DAG.getNode(N->getOpcode(), dl, MVT::i32, X,
11875  DAG.getConstant(C1Int, dl, MVT::i32));
11876  // Shift left to compensate for the lshr of C1Int.
11877  SDValue Res = DAG.getNode(ISD::SHL, dl, MVT::i32, BinOp, SHL.getOperand(1));
11878 
11879  LLVM_DEBUG(dbgs() << "Simplify shl use:\n"; SHL.getOperand(0).dump();
11880  SHL.dump(); N->dump());
11881  LLVM_DEBUG(dbgs() << "Into:\n"; X.dump(); BinOp.dump(); Res.dump());
11882  return Res;
11883 }
11884 
11885 
11886 /// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
11887 ///
11890  const ARMSubtarget *Subtarget) {
11891  SDValue N0 = N->getOperand(0);
11892  SDValue N1 = N->getOperand(1);
11893 
11894  // Only works one way, because it needs an immediate operand.
11895  if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
11896  return Result;
11897 
11898  // First try with the default operand order.
11899  if (SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget))
11900  return Result;
11901 
11902  // If that didn't work, try again with the operands commuted.
11903  return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget);
11904 }
11905 
11906 /// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
11907 ///
11910  const ARMSubtarget *Subtarget) {
11911  SDValue N0 = N->getOperand(0);
11912  SDValue N1 = N->getOperand(1);
11913 
11914  // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
11915  if (N1.getNode()->hasOneUse())
11916  if (SDValue Result = combineSelectAndUse(N, N1, N0, DCI))
11917  return Result;
11918 
11919  if (!Subtarget->hasMVEIntegerOps() || !N->getValueType(0).isVector())
11920  return SDValue();
11921 
11922  // Fold (sub (ARMvmovImm 0), (ARMvdup x)) -> (ARMvdup (sub 0, x))
11923  // so that we can readily pattern match more mve instructions which can use
11924  // a scalar operand.
11925  SDValue VDup = N->getOperand(1);
11926  if (VDup->getOpcode() != ARMISD::VDUP)
11927  return SDValue();
11928 
11929  SDValue VMov = N->getOperand(0);
11930  if (VMov->getOpcode() == ISD::BITCAST)
11931  VMov = VMov->getOperand(0);
11932 
11933  if (VMov->getOpcode() != ARMISD::VMOVIMM || !isZeroVector(VMov))
11934  return SDValue();
11935 
11936  SDLoc dl(N);
11937  SDValue Negate = DCI.DAG.getNode(ISD::SUB, dl, MVT::i32,
11938  DCI.DAG.getConstant(0, dl, MVT::i32),
11939  VDup->getOperand(0));
11940  return DCI.DAG.getNode(ARMISD::VDUP, dl, N->getValueType(0), Negate);
11941 }
11942 
11943 /// PerformVMULCombine
11944 /// Distribute (A + B) * C to (A * C) + (B * C) to take advantage of the
11945 /// special multiplier accumulator forwarding.
11946 /// vmul d3, d0, d2
11947 /// vmla d3, d1, d2
11948 /// is faster than
11949 /// vadd d3, d0, d1
11950 /// vmul d3, d3, d2
11951 // However, for (A + B) * (A + B),
11952 // vadd d2, d0, d1
11953 // vmul d3, d0, d2
11954 // vmla d3, d1, d2
11955 // is slower than
11956 // vadd d2, d0, d1
11957 // vmul d3, d2, d2
11960  const ARMSubtarget *Subtarget) {
11961  if (!Subtarget->hasVMLxForwarding())
11962  return SDValue();
11963 
11964  SelectionDAG &DAG = DCI.DAG;
11965  SDValue N0 = N->getOperand(0);
11966  SDValue N1 = N->getOperand(1);
11967  unsigned Opcode = N0.getOpcode();
11968  if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
11969  Opcode != ISD::FADD && Opcode != ISD::FSUB) {
11970  Opcode = N1.getOpcode();
11971  if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
11972  Opcode != ISD::FADD && Opcode != ISD::FSUB)
11973  return SDValue();
11974  std::swap(N0, N1);
11975  }
11976 
11977  if (N0 == N1)
11978  return SDValue();
11979 
11980  EVT VT = N->getValueType(0);
11981  SDLoc DL(N);
11982  SDValue N00 = N0->getOperand(0);
11983  SDValue N01 = N0->getOperand(1);
11984  return DAG.getNode(Opcode, DL, VT,
11985  DAG.getNode(ISD::MUL, DL, VT, N00, N1),
11986  DAG.getNode(ISD::MUL, DL, VT, N01, N1));
11987 }
11988 
11991  const ARMSubtarget *Subtarget) {
11992  SelectionDAG &DAG = DCI.DAG;
11993 
11994  if (Subtarget->isThumb1Only())
11995  return SDValue();
11996 
11997  if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
11998  return SDValue();
11999 
12000  EVT VT = N->getValueType(0);
12001  if (VT.is64BitVector() || VT.is128BitVector())
12002  return PerformVMULCombine(N, DCI, Subtarget);
12003  if (VT != MVT::i32)
12004  return SDValue();
12005 
12007  if (!C)
12008  return SDValue();
12009 
12010  int64_t MulAmt = C->getSExtValue();
12011  unsigned ShiftAmt = countTrailingZeros<uint64_t>(MulAmt);
12012 
12013  ShiftAmt = ShiftAmt & (32 - 1);
12014  SDValue V = N->getOperand(0);
12015  SDLoc DL(N);
12016 
12017  SDValue Res;
12018  MulAmt >>= ShiftAmt;
12019 
12020  if (MulAmt >= 0) {
12021  if (isPowerOf2_32(MulAmt - 1)) {
12022  // (mul x, 2^N + 1) => (add (shl x, N), x)
12023  Res = DAG.getNode(ISD::ADD, DL, VT,
12024  V,
12025  DAG.getNode(ISD::SHL, DL, VT,
12026  V,
12027  DAG.getConstant(Log2_32(MulAmt - 1), DL,
12028  MVT::i32)));
12029  } else if (isPowerOf2_32(MulAmt + 1)) {
12030  // (mul x, 2^N - 1) => (sub (shl x, N), x)
12031  Res = DAG.getNode(ISD::SUB, DL, VT,
12032  DAG.getNode(ISD::SHL, DL, VT,
12033  V,
12034  DAG.getConstant(Log2_32(MulAmt + 1), DL,
12035  MVT::i32)),
12036  V);
12037  } else
12038  return SDValue();
12039  } else {
12040  uint64_t MulAmtAbs = -MulAmt;
12041  if (isPowerOf2_32(MulAmtAbs + 1)) {
12042  // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
12043  Res = DAG.getNode(ISD::SUB, DL, VT,
12044  V,
12045  DAG.getNode(ISD::SHL, DL, VT,
12046  V,
12047  DAG.getConstant(Log2_32(MulAmtAbs + 1), DL,
12048  MVT::i32)));
12049  } else if (isPowerOf2_32(MulAmtAbs - 1)) {
12050  // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
12051  Res = DAG.getNode(ISD::ADD, DL, VT,
12052  V,
12053  DAG.getNode(ISD::SHL, DL, VT,
12054  V,
12055  DAG.getConstant(Log2_32(MulAmtAbs - 1), DL,
12056  MVT::i32)));
12057  Res = DAG.getNode(ISD::SUB, DL, VT,
12058  DAG.getConstant(0, DL, MVT::i32), Res);
12059  } else
12060  return SDValue();
12061  }
12062 
12063  if (ShiftAmt != 0)
12064  Res = DAG.getNode(ISD::SHL, DL, VT,
12065  Res, DAG.getConstant(ShiftAmt, DL, MVT::i32));
12066 
12067  // Do not add new nodes to DAG combiner worklist.
12068  DCI.CombineTo(N, Res, false);
12069  return SDValue();
12070 }
12071 
12074  const ARMSubtarget *Subtarget) {
12075  // Allow DAGCombine to pattern-match before we touch the canonical form.
12076  if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
12077  return SDValue();
12078 
12079  if (N->getValueType(0) != MVT::i32)
12080  return SDValue();
12081 
12083  if (!N1C)
12084  return SDValue();
12085 
12086  uint32_t C1 = (uint32_t)N1C->getZExtValue();
12087  // Don't transform uxtb/uxth.
12088  if (C1 == 255 || C1 == 65535)
12089  return SDValue();
12090 
12091  SDNode *N0 = N->getOperand(0).getNode();
12092  if (!N0->hasOneUse())
12093  return SDValue();
12094 
12095  if (N0->getOpcode() != ISD::SHL && N0->getOpcode() != ISD::SRL)
12096  return SDValue();
12097 
12098  bool LeftShift = N0->getOpcode() == ISD::SHL;
12099 
12101  if (!N01C)
12102  return SDValue();
12103 
12104  uint32_t C2 = (uint32_t)N01C->getZExtValue();
12105  if (!C2 || C2 >= 32)
12106  return SDValue();
12107 
12108  // Clear irrelevant bits in the mask.
12109  if (LeftShift)
12110  C1 &= (-1U << C2);
12111  else
12112  C1 &= (-1U >> C2);
12113 
12114  SelectionDAG &DAG = DCI.DAG;
12115  SDLoc DL(N);
12116 
12117  // We have a pattern of the form "(and (shl x, c2) c1)" or
12118  // "(and (srl x, c2) c1)", where c1 is a shifted mask. Try to
12119  // transform to a pair of shifts, to save materializing c1.
12120 
12121  // First pattern: right shift, then mask off leading bits.
12122  // FIXME: Use demanded bits?
12123  if (!LeftShift && isMask_32(C1)) {
12124  uint32_t C3 = countLeadingZeros(C1);
12125  if (C2 < C3) {
12126  SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),
12127  DAG.getConstant(C3 - C2, DL, MVT::i32));
12128  return DAG.getNode(ISD::SRL, DL, MVT::i32, SHL,
12129  DAG.getConstant(C3, DL, MVT::i32));
12130  }
12131  }
12132 
12133  // First pattern, reversed: left shift, then mask off trailing bits.
12134  if (LeftShift && isMask_32(~C1)) {
12135  uint32_t C3 = countTrailingZeros(C1);
12136  if (C2 < C3) {
12137  SDValue SHL = DAG.getNode(ISD::SRL, DL, MVT::i32, N0->getOperand(0),
12138  DAG.getConstant(C3 - C2, DL, MVT::i32));
12139  return DAG.getNode(ISD::SHL, DL, MVT::i32, SHL,
12140  DAG.getConstant(C3, DL, MVT::i32));
12141  }
12142  }
12143 
12144  // Second pattern: left shift, then mask off leading bits.
12145  // FIXME: Use demanded bits?
12146  if (LeftShift && isShiftedMask_32(C1)) {
12147  uint32_t Trailing = countTrailingZeros(C1);
12148  uint32_t C3 = countLeadingZeros(C1);
12149  if (Trailing == C2 && C2 + C3 < 32) {
12150  SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),
12151  DAG.getConstant(C2 + C3, DL, MVT::i32));
12152  return DAG.getNode(ISD::SRL, DL, MVT::i32, SHL,
12153  DAG.getConstant(C3, DL, MVT::i32));
12154  }
12155  }
12156 
12157  // Second pattern, reversed: right shift, then mask off trailing bits.
12158  // FIXME: Handle other patterns of known/demanded bits.
12159  if (!LeftShift && isShiftedMask_32(C1)) {
12160  uint32_t Leading = countLeadingZeros(C1);
12161  uint32_t C3 = countTrailingZeros(C1);
12162  if (Leading == C2 && C2 + C3 < 32) {
12163  SDValue SHL = DAG.getNode(ISD::SRL, DL, MVT::i32, N0->getOperand(0),
12164  DAG.getConstant(C2 + C3, DL, MVT::i32));
12165  return DAG.getNode(ISD::SHL, DL, MVT::i32, SHL,
12166  DAG.getConstant(C3, DL, MVT::i32));
12167  }
12168  }
12169 
12170  // FIXME: Transform "(and (shl x, c2) c1)" ->
12171  // "(shl (and x, c1>>c2), c2)" if "c1 >> c2" is a cheaper immediate than
12172  // c1.
12173  return SDValue();
12174 }
12175 
12178  const ARMSubtarget *Subtarget) {
12179  // Attempt to use immediate-form VBIC
12181  SDLoc dl(N);
12182  EVT VT = N->getValueType(0);
12183  SelectionDAG &DAG = DCI.DAG;
12184 
12185  if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
12186  return SDValue();
12187 
12188  APInt SplatBits, SplatUndef;
12189  unsigned SplatBitSize;
12190  bool HasAnyUndefs;
12191  if (BVN && Subtarget->hasNEON() &&
12192  BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
12193  if (SplatBitSize <= 64) {
12194  EVT VbicVT;
12195  SDValue Val = isVMOVModifiedImm((~SplatBits).getZExtValue(),
12196  SplatUndef.getZExtValue(), SplatBitSize,
12197  DAG, dl, VbicVT, VT.is128BitVector(),
12198  OtherModImm);
12199  if (Val.getNode()) {
12200  SDValue Input =
12201  DAG.getNode(ISD::BITCAST, dl, VbicVT, N->getOperand(0));
12202  SDValue Vbic = DAG.getNode(ARMISD::VBICIMM, dl, VbicVT, Input, Val);
12203  return DAG.getNode(ISD::BITCAST, dl, VT, Vbic);
12204  }
12205  }
12206  }
12207 
12208  if (!Subtarget->isThumb1Only()) {
12209  // fold (and (select cc, -1, c), x) -> (select cc, x, (and, x, c))
12210  if (SDValue Result = combineSelectAndUseCommutative(N, true, DCI))
12211  return Result;
12212 
12213  if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
12214  return Result;
12215  }
12216 
12217  if (Subtarget->isThumb1Only())
12218  if (SDValue Result = CombineANDShift(N, DCI, Subtarget))
12219  return Result;
12220 
12221  return SDValue();
12222 }
12223 
12224 // Try combining OR nodes to SMULWB, SMULWT.
12227  const ARMSubtarget *Subtarget) {
12228  if (!Subtarget->hasV6Ops() ||
12229  (Subtarget->isThumb() &&
12230  (!Subtarget->hasThumb2() || !Subtarget->hasDSP())))
12231  return SDValue();
12232 
12233  SDValue SRL = OR->getOperand(0);
12234  SDValue SHL = OR->getOperand(1);
12235 
12236  if (SRL.getOpcode() != ISD::SRL || SHL.getOpcode() != ISD::SHL) {
12237  SRL = OR->getOperand(1);
12238  SHL = OR->getOperand(0);
12239  }
12240  if (!isSRL16(SRL) || !isSHL16(SHL))
12241  return SDValue();
12242 
12243  // The first operands to the shifts need to be the two results from the
12244  // same smul_lohi node.
12245  if ((SRL.getOperand(0).getNode() != SHL.getOperand(0).getNode()) ||
12246  SRL.getOperand(0).getOpcode() != ISD::SMUL_LOHI)
12247  return SDValue();
12248 
12249  SDNode *SMULLOHI = SRL.getOperand(0).getNode();
12250  if (SRL.getOperand(0) != SDValue(SMULLOHI, 0) ||
12251  SHL.getOperand(0) != SDValue(SMULLOHI, 1))
12252  return SDValue();
12253 
12254  // Now we have:
12255  // (or (srl (smul_lohi ?, ?), 16), (shl (smul_lohi ?, ?), 16)))
12256  // For SMUL[B|T] smul_lohi will take a 32-bit and a 16-bit arguments.
12257  // For SMUWB the 16-bit value will signed extended somehow.
12258  // For SMULWT only the SRA is required.
12259  // Check both sides of SMUL_LOHI
12260  SDValue OpS16 = SMULLOHI->getOperand(0);
12261  SDValue OpS32 = SMULLOHI->getOperand(1);
12262 
12263  SelectionDAG &DAG = DCI.DAG;
12264  if (!isS16(OpS16, DAG) && !isSRA16(OpS16)) {
12265  OpS16 = OpS32;
12266  OpS32 = SMULLOHI->getOperand(0);
12267  }
12268 
12269  SDLoc dl(OR);
12270  unsigned Opcode = 0;
12271  if (isS16(OpS16, DAG))
12272  Opcode = ARMISD::SMULWB;
12273  else if (isSRA16(OpS16)) {
12274  Opcode = ARMISD::SMULWT;
12275  OpS16 = OpS16->getOperand(0);
12276  }
12277  else
12278  return SDValue();
12279 
12280  SDValue Res = DAG.getNode(Opcode, dl, MVT::i32, OpS32, OpS16);
12281  DAG.ReplaceAllUsesOfValueWith(SDValue(OR, 0), Res);
12282  return SDValue(OR, 0);
12283 }
12284 
12287  const ARMSubtarget *Subtarget) {
12288  // BFI is only available on V6T2+
12289  if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops())
12290  return SDValue();
12291 
12292  EVT VT = N->getValueType(0);
12293  SDValue N0 = N->getOperand(0);
12294  SDValue N1 = N->getOperand(1);
12295  SelectionDAG &DAG = DCI.DAG;
12296  SDLoc DL(N);
12297  // 1) or (and A, mask), val => ARMbfi A, val, mask
12298  // iff (val & mask) == val
12299  //
12300  // 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
12301  // 2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2)
12302  // && mask == ~mask2
12303  // 2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2)
12304  // && ~mask == mask2
12305  // (i.e., copy a bitfield value into another bitfield of the same width)
12306 
12307  if (VT != MVT::i32)
12308  return SDValue();
12309 
12310  SDValue N00 = N0.getOperand(0);
12311 
12312  // The value and the mask need to be constants so we can verify this is
12313  // actually a bitfield set. If the mask is 0xffff, we can do better
12314  // via a movt instruction, so don't use BFI in that case.
12315  SDValue MaskOp = N0.getOperand(1);
12316  ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(MaskOp);
12317  if (!MaskC)
12318  return SDValue();
12319  unsigned Mask = MaskC->getZExtValue();
12320  if (Mask == 0xffff)
12321  return SDValue();
12322  SDValue Res;
12323  // Case (1): or (and A, mask), val => ARMbfi A, val, mask
12325  if (N1C) {
12326  unsigned Val = N1C->getZExtValue();
12327  if ((Val & ~Mask) != Val)
12328  return SDValue();
12329 
12330  if (ARM::isBitFieldInvertedMask(Mask)) {
12331  Val >>= countTrailingZeros(~Mask);
12332 
12333  Res = DAG.getNode(ARMISD::BFI, DL, VT, N00,
12334  DAG.getConstant(Val, DL, MVT::i32),
12335  DAG.getConstant(Mask, DL, MVT::i32));
12336 
12337  DCI.CombineTo(N, Res, false);
12338  // Return value from the original node to inform the combiner than N is
12339  // now dead.
12340  return SDValue(N, 0);
12341  }
12342  } else if (N1.getOpcode() == ISD::AND) {
12343  // case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
12345  if (!N11C)
12346  return SDValue();
12347  unsigned Mask2 = N11C->getZExtValue();
12348 
12349  // Mask and ~Mask2 (or reverse) must be equivalent for the BFI pattern
12350  // as is to match.
12351  if (ARM::isBitFieldInvertedMask(Mask) &&
12352  (Mask == ~Mask2)) {
12353  // The pack halfword instruction works better for masks that fit it,
12354  // so use that when it's available.
12355  if (Subtarget->hasDSP() &&
12356  (Mask == 0xffff || Mask == 0xffff0000))
12357  return SDValue();
12358  // 2a
12359  unsigned amt = countTrailingZeros(Mask2);
12360  Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0),
12361  DAG.getConstant(amt, DL, MVT::i32));
12362  Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res,
12363  DAG.getConstant(Mask, DL, MVT::i32));
12364  DCI.CombineTo(N, Res, false);
12365  // Return value from the original node to inform the combiner than N is
12366  // now dead.
12367  return SDValue(N, 0);
12368  } else if (ARM::isBitFieldInvertedMask(~Mask) &&
12369  (~Mask == Mask2)) {
12370  // The pack halfword instruction works better for masks that fit it,
12371  // so use that when it's available.
12372  if (Subtarget->hasDSP() &&
12373  (Mask2 == 0xffff || Mask2 == 0xffff0000))
12374  return SDValue();
12375  // 2b
12376  unsigned lsb = countTrailingZeros(Mask);
12377  Res = DAG.getNode(ISD::SRL, DL, VT, N00,
12378  DAG.getConstant(lsb, DL, MVT::i32));
12379  Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res,
12380  DAG.getConstant(Mask2, DL, MVT::i32));
12381  DCI.CombineTo(N, Res, false);
12382  // Return value from the original node to inform the combiner than N is
12383  // now dead.
12384  return SDValue(N, 0);
12385  }
12386  }
12387 
12388  if (DAG.MaskedValueIsZero(N1, MaskC->getAPIntValue()) &&
12389  N00.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N00.getOperand(1)) &&
12390  ARM::isBitFieldInvertedMask(~Mask)) {
12391  // Case (3): or (and (shl A, #shamt), mask), B => ARMbfi B, A, ~mask
12392  // where lsb(mask) == #shamt and masked bits of B are known zero.
12393  SDValue ShAmt = N00.getOperand(1);
12394  unsigned ShAmtC = cast<ConstantSDNode>(ShAmt)->getZExtValue();
12395  unsigned LSB = countTrailingZeros(Mask);
12396  if (ShAmtC != LSB)
12397  return SDValue();
12398 
12399  Res = DAG.getNode(ARMISD::BFI, DL, VT, N1, N00.getOperand(0),
12400  DAG.getConstant(~Mask, DL, MVT::i32));
12401 
12402  DCI.CombineTo(N, Res, false);
12403  // Return value from the original node to inform the combiner than N is
12404  // now dead.
12405  return SDValue(N, 0);
12406  }
12407 
12408  return SDValue();
12409 }
12410 
12411 static bool isValidMVECond(unsigned CC, bool IsFloat) {
12412  switch (CC) {
12413  case ARMCC::EQ:
12414  case ARMCC::NE:
12415  case ARMCC::LE:
12416  case ARMCC::GT:
12417  case ARMCC::GE:
12418  case ARMCC::LT:
12419  return true;
12420  case ARMCC::HS:
12421  case ARMCC::HI:
12422  return !IsFloat;
12423  default:
12424  return false;
12425  };
12426 }
12427 
12430  const ARMSubtarget *Subtarget) {
12431  // Try to invert "or A, B" -> "and ~A, ~B", as the "and" is easier to chain
12432  // together with predicates
12433  EVT VT = N->getValueType(0);
12434  SDValue N0 = N->getOperand(0);
12435  SDValue N1 = N->getOperand(1);
12436 
12437  ARMCC::CondCodes CondCode0 = ARMCC::AL;
12438  ARMCC::CondCodes CondCode1 = ARMCC::AL;
12439  if (N0->getOpcode() == ARMISD::VCMP)
12440  CondCode0 = (ARMCC::CondCodes)cast<const ConstantSDNode>(N0->getOperand(2))
12441  ->getZExtValue();
12442  else if (N0->getOpcode() == ARMISD::VCMPZ)
12443  CondCode0 = (ARMCC::CondCodes)cast<const ConstantSDNode>(N0->getOperand(1))
12444  ->getZExtValue();
12445  if (N1->getOpcode() == ARMISD::VCMP)
12446  CondCode1 = (ARMCC::CondCodes)cast<const ConstantSDNode>(N1->getOperand(2))
12447  ->getZExtValue();
12448  else if (N1->getOpcode() == ARMISD::VCMPZ)
12449  CondCode1 = (ARMCC::CondCodes)cast<const ConstantSDNode>(N1->getOperand(1))
12450  ->getZExtValue();
12451 
12452  if (CondCode0 == ARMCC::AL || CondCode1 == ARMCC::AL)
12453  return SDValue();
12454 
12455  unsigned Opposite0 = ARMCC::getOppositeCondition(CondCode0);
12456  unsigned Opposite1 = ARMCC::getOppositeCondition(CondCode1);
12457 
12458  if (!isValidMVECond(Opposite0,
12459  N0->getOperand(0)->getValueType(0).isFloatingPoint()) ||
12460  !isValidMVECond(Opposite1,
12461  N1->getOperand(0)->getValueType(0).isFloatingPoint()))
12462  return SDValue();
12463 
12465  Ops0.push_back(N0->getOperand(0));
12466  if (N0->getOpcode() == ARMISD::VCMP)
12467  Ops0.push_back(N0->getOperand(1));
12468  Ops0.push_back(DCI.DAG.getConstant(Opposite0, SDLoc(N0), MVT::i32));
12470  Ops1.push_back(N1->getOperand(0));
12471  if (N1->getOpcode() == ARMISD::VCMP)
12472  Ops1.push_back(N1->getOperand(1));
12473  Ops1.push_back(DCI.DAG.getConstant(Opposite1, SDLoc(N1), MVT::i32));
12474 
12475  SDValue NewN0 = DCI.DAG.getNode(N0->getOpcode(), SDLoc(N0), VT, Ops0);
12476  SDValue NewN1 = DCI.DAG.getNode(N1->getOpcode(), SDLoc(N1), VT, Ops1);
12477  SDValue And = DCI.DAG.getNode(ISD::AND, SDLoc(N), VT, NewN0, NewN1);
12478  return DCI.DAG.getNode(ISD::XOR, SDLoc(N), VT, And,
12479  DCI.DAG.getAllOnesConstant(SDLoc(N), VT));
12480 }
12481 
12482 /// PerformORCombine - Target-specific dag combine xforms for ISD::OR
12485  const ARMSubtarget *Subtarget) {
12486  // Attempt to use immediate-form VORR
12488  SDLoc dl(N);
12489  EVT VT = N->getValueType(0);
12490  SelectionDAG &DAG = DCI.DAG;
12491 
12492  if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
12493  return SDValue();
12494 
12495  APInt SplatBits, SplatUndef;
12496  unsigned SplatBitSize;
12497  bool HasAnyUndefs;
12498  if (BVN && Subtarget->hasNEON() &&
12499  BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
12500  if (SplatBitSize <= 64) {
12501  EVT VorrVT;
12502  SDValue Val = isVMOVModifiedImm(SplatBits.getZExtValue(),
12503  SplatUndef.getZExtValue(), SplatBitSize,
12504  DAG, dl, VorrVT, VT.is128BitVector(),
12505  OtherModImm);
12506  if (Val.getNode()) {
12507  SDValue Input =
12508  DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0));
12509  SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, Input, Val);
12510  return DAG.getNode(ISD::BITCAST, dl, VT, Vorr);
12511  }
12512  }
12513  }
12514 
12515  if (!Subtarget->isThumb1Only()) {
12516  // fold (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))
12517  if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI))
12518  return Result;
12519  if (SDValue Result = PerformORCombineToSMULWBT(N, DCI, Subtarget))
12520  return Result;
12521  }
12522 
12523  SDValue N0 = N->getOperand(0);
12524  SDValue N1 = N->getOperand(1);
12525 
12526  // (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant.
12527  if (Subtarget->hasNEON() && N1.getOpcode() == ISD::AND && VT.isVector() &&
12528  DAG.getTargetLoweringInfo().isTypeLegal(VT)) {
12529 
12530  // The code below optimizes (or (and X, Y), Z).
12531  // The AND operand needs to have a single user to make these optimizations
12532  // profitable.
12533  if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
12534  return SDValue();
12535 
12536  APInt SplatUndef;
12537  unsigned SplatBitSize;
12538  bool HasAnyUndefs;
12539 
12540  APInt SplatBits0, SplatBits1;
12543  // Ensure that the second operand of both ands are constants
12544  if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize,
12545  HasAnyUndefs) && !HasAnyUndefs) {
12546  if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize,
12547  HasAnyUndefs) && !HasAnyUndefs) {
12548  // Ensure that the bit width of the constants are the same and that
12549  // the splat arguments are logical inverses as per the pattern we
12550  // are trying to simplify.
12551  if (SplatBits0.getBitWidth() == SplatBits1.getBitWidth() &&
12552  SplatBits0 == ~SplatBits1) {
12553  // Canonicalize the vector type to make instruction selection
12554  // simpler.
12555  EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
12556  SDValue Result = DAG.getNode(ARMISD::VBSL, dl, CanonicalVT,
12557  N0->getOperand(1),
12558  N0->getOperand(0),
12559  N1->getOperand(0));
12560  return DAG.getNode(ISD::BITCAST, dl, VT, Result);
12561  }
12562  }
12563  }
12564  }
12565 
12566  if (Subtarget->hasMVEIntegerOps() &&
12567  (VT == MVT::v4i1 || VT == MVT::v8i1 || VT == MVT::v16i1))
12568  return PerformORCombine_i1(N, DCI, Subtarget);
12569 
12570  // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when
12571  // reasonable.
12572  if (N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
12573  if (SDValue Res = PerformORCombineToBFI(N, DCI, Subtarget))
12574  return Res;
12575  }
12576 
12577  if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
12578  return Result;
12579 
12580  return SDValue();
12581 }
12582 
12585  const ARMSubtarget *Subtarget) {
12586  EVT VT = N->getValueType(0);
12587  SelectionDAG &DAG = DCI.DAG;
12588 
12589  if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
12590  return SDValue();
12591 
12592  if (!Subtarget->isThumb1Only()) {
12593  // fold (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c))
12594  if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI))
12595  return Result;
12596 
12597  if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
12598  return Result;
12599  }
12600 
12601  return SDValue();
12602 }
12603 
12604 // ParseBFI - given a BFI instruction in N, extract the "from" value (Rn) and return it,
12605 // and fill in FromMask and ToMask with (consecutive) bits in "from" to be extracted and
12606 // their position in "to" (Rd).
12607 static SDValue ParseBFI(SDNode *N, APInt &ToMask, APInt &FromMask) {
12608  assert(N->getOpcode() == ARMISD::BFI);
12609 
12610  SDValue From = N->getOperand(1);
12611  ToMask = ~cast<ConstantSDNode>(N->getOperand(2))->getAPIntValue();
12612  FromMask = APInt::getLowBitsSet(ToMask.getBitWidth(), ToMask.countPopulation());
12613 
12614  // If the Base came from a SHR #C, we can deduce that it is really testing bit
12615  // #C in the base of the SHR.
12616  if (From->getOpcode() == ISD::SRL &&
12617  isa<ConstantSDNode>(From->getOperand(1))) {
12618  APInt Shift = cast<ConstantSDNode>(From->getOperand(1))->getAPIntValue();
12619  assert(Shift.getLimitedValue() < 32 && "Shift too large!");
12620  FromMask <<= Shift.getLimitedValue(31);
12621  From = From->getOperand(0);
12622  }
12623 
12624  return From;
12625 }
12626 
12627 // If A and B contain one contiguous set of bits, does A | B == A . B?
12628 //
12629 // Neither A nor B must be zero.
12630 static bool BitsProperlyConcatenate(const APInt &A, const APInt &B) {
12631  unsigned LastActiveBitInA = A.countTrailingZeros();
12632  unsigned FirstActiveBitInB = B.getBitWidth() - B.countLeadingZeros() - 1;
12633  return LastActiveBitInA - 1 == FirstActiveBitInB;
12634 }
12635 
12637  // We have a BFI in N. Follow a possible chain of BFIs and find a BFI it can combine with,
12638  // if one exists.
12639  APInt ToMask, FromMask;
12640  SDValue From = ParseBFI(N, ToMask, FromMask);
12641  SDValue To = N->getOperand(0);
12642 
12643  // Now check for a compatible BFI to merge with. We can pass through BFIs that
12644  // aren't compatible, but not if they set the same bit in their destination as
12645  // we do (or that of any BFI we're going to combine with).
12646  SDValue V = To;
12647  APInt CombinedToMask = ToMask;
12648  while (V.getOpcode() == ARMISD::BFI) {
12649  APInt NewToMask, NewFromMask;
12650  SDValue NewFrom = ParseBFI(V.getNode(), NewToMask, NewFromMask);
12651  if (NewFrom != From) {
12652  // This BFI has a different base. Keep going.
12653  CombinedToMask |= NewToMask;
12654  V = V.getOperand(0);
12655  continue;
12656  }
12657 
12658  // Do the written bits conflict with any we've seen so far?
12659  if ((NewToMask & CombinedToMask).getBoolValue())
12660  // Conflicting bits - bail out because going further is unsafe.
12661  return SDValue();
12662 
12663  // Are the new bits contiguous when combined with the old bits?
12664  if (BitsProperlyConcatenate(ToMask, NewToMask) &&
12665  BitsProperlyConcatenate(FromMask, NewFromMask))
12666  return V;
12667  if (BitsProperlyConcatenate(NewToMask, ToMask) &&
12668  BitsProperlyConcatenate(NewFromMask, FromMask))
12669  return V;
12670 
12671  // We've seen a write to some bits, so track it.
12672  CombinedToMask |= NewToMask;
12673  // Keep going...
12674  V = V.getOperand(0);
12675  }
12676 
12677  return SDValue();
12678 }
12679 
12682  SDValue N1 = N->getOperand(1);
12683  if (N1.getOpcode() == ISD::AND) {
12684  // (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff
12685  // the bits being cleared by the AND are not demanded by the BFI.
12687  if (!N11C)
12688  return SDValue();
12689  unsigned InvMask = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
12690  unsigned LSB = countTrailingZeros(~InvMask);
12691  unsigned Width = (32 - countLeadingZeros(~InvMask)) - LSB;
12692  assert(Width <
12693  static_cast<unsigned>(std::numeric_limits<unsigned>::digits) &&
12694  "undefined behavior");
12695  unsigned Mask = (1u << Width) - 1;
12696  unsigned Mask2 = N11C->getZExtValue();
12697  if ((Mask & (~Mask2)) == 0)
12698  return DCI.DAG.getNode(ARMISD::BFI, SDLoc(N), N->getValueType(0),
12699  N->getOperand(0), N1.getOperand(0),
12700  N->getOperand(2));
12701  } else if (N->getOperand(0).getOpcode() == ARMISD::BFI) {
12702  // We have a BFI of a BFI. Walk up the BFI chain to see how long it goes.
12703  // Keep track of any consecutive bits set that all come from the same base
12704  // value. We can combine these together into a single BFI.
12705  SDValue CombineBFI = FindBFIToCombineWith(N);
12706  if (CombineBFI == SDValue())
12707  return SDValue();
12708 
12709  // We've found a BFI.
12710  APInt ToMask1, FromMask1;
12711  SDValue From1 = ParseBFI(N, ToMask1, FromMask1);
12712 
12713  APInt ToMask2, FromMask2;
12714  SDValue From2 = ParseBFI(CombineBFI.getNode(), ToMask2, FromMask2);
12715  assert(From1 == From2);
12716  (void)From2;
12717 
12718  // First, unlink CombineBFI.
12719  DCI.DAG.ReplaceAllUsesWith(CombineBFI, CombineBFI.getOperand(0));
12720  // Then create a new BFI, combining the two together.
12721  APInt NewFromMask = FromMask1 | FromMask2;
12722  APInt NewToMask = ToMask1 | ToMask2;
12723 
12724  EVT VT = N->getValueType(0);
12725  SDLoc dl(N);
12726 
12727  if (NewFromMask[0] == 0)
12728  From1 = DCI.DAG.getNode(
12729  ISD::SRL, dl, VT, From1,
12730  DCI.DAG.getConstant(NewFromMask.countTrailingZeros(), dl, VT));
12731  return DCI.DAG.getNode(ARMISD::BFI, dl, VT, N->getOperand(0), From1,
12732  DCI.DAG.getConstant(~NewToMask, dl, VT));
12733  }
12734  return SDValue();
12735 }
12736 
12737 /// PerformVMOVRRDCombine - Target-specific dag combine xforms for
12738 /// ARMISD::VMOVRRD.
12741  const ARMSubtarget *Subtarget) {
12742  // vmovrrd(vmovdrr x, y) -> x,y
12743  SDValue InDouble = N->getOperand(0);
12744  if (InDouble.getOpcode() == ARMISD::VMOVDRR && Subtarget->hasFP64())
12745  return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1));
12746 
12747  // vmovrrd(load f64) -> (load i32), (load i32)
12748  SDNode *InNode = InDouble.getNode();
12749  if (ISD::isNormalLoad(InNode) && InNode->hasOneUse() &&
12750  InNode->getValueType(0) == MVT::f64 &&
12751  InNode->getOperand(1).getOpcode() == ISD::FrameIndex &&
12752  !cast<LoadSDNode>(InNode)->isVolatile()) {
12753  // TODO: Should this be done for non-FrameIndex operands?
12754  LoadSDNode *LD = cast<LoadSDNode>(InNode);
12755 
12756  SelectionDAG &DAG = DCI.DAG;
12757  SDLoc DL(LD);
12758  SDValue BasePtr = LD->getBasePtr();
12759  SDValue NewLD1 =
12760  DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr, LD->getPointerInfo(),
12761  LD->getAlignment(), LD->getMemOperand()->getFlags());
12762 
12763  SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
12764  DAG.getConstant(4, DL, MVT::i32));
12765 
12766  SDValue NewLD2 = DAG.getLoad(MVT::i32, DL, LD->getChain(), OffsetPtr,
12767  LD->getPointerInfo().getWithOffset(4),
12768  std::min(4U, LD->getAlignment()),
12769  LD->getMemOperand()->getFlags());
12770 
12771  DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1));
12772  if (DCI.DAG.getDataLayout().isBigEndian())
12773  std::swap (NewLD1, NewLD2);
12774  SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2);
12775  return Result;
12776  }
12777 
12778  return SDValue();
12779 }
12780 
12781 /// PerformVMOVDRRCombine - Target-specific dag combine xforms for
12782 /// ARMISD::VMOVDRR. This is also used for BUILD_VECTORs with 2 operands.
12784  // N=vmovrrd(X); vmovdrr(N:0, N:1) -> bit_convert(X)
12785  SDValue Op0 = N->getOperand(0);
12786  SDValue Op1 = N->getOperand(1);
12787  if (Op0.getOpcode() == ISD::BITCAST)
12788  Op0 = Op0.getOperand(0);
12789  if (Op1.getOpcode() == ISD::BITCAST)
12790  Op1 = Op1.getOperand(0);
12791  if (Op0.getOpcode() == ARMISD::VMOVRRD &&
12792  Op0.getNode() == Op1.getNode() &&
12793  Op0.getResNo() == 0 && Op1.getResNo() == 1)
12794  return DAG.getNode(ISD::BITCAST, SDLoc(N),
12795  N->getValueType(0), Op0.getOperand(0));
12796  return SDValue();
12797 }
12798 
12799 /// hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node
12800 /// are normal, non-volatile loads. If so, it is profitable to bitcast an
12801 /// i64 vector to have f64 elements, since the value can then be loaded
12802 /// directly into a VFP register.
12803 static bool hasNormalLoadOperand(SDNode *N) {
12804  unsigned NumElts = N->getValueType(0).getVectorNumElements();
12805  for (unsigned i = 0; i < NumElts; ++i) {
12806  SDNode *Elt = N->getOperand(i).getNode();
12807  if (ISD::isNormalLoad(Elt) && !cast<LoadSDNode>(Elt)->isVolatile())
12808  return true;
12809  }
12810  return false;
12811 }
12812 
12813 /// PerformBUILD_VECTORCombine - Target-specific dag combine xforms for
12814 /// ISD::BUILD_VECTOR.
12817  const ARMSubtarget *Subtarget) {
12818  // build_vector(N=ARMISD::VMOVRRD(X), N:1) -> bit_convert(X):
12819  // VMOVRRD is introduced when legalizing i64 types. It forces the i64 value
12820  // into a pair of GPRs, which is fine when the value is used as a scalar,
12821  // but if the i64 value is converted to a vector, we need to undo the VMOVRRD.
12822  SelectionDAG &DAG = DCI.DAG;
12823  if (N->getNumOperands() == 2)
12824  if (SDValue RV = PerformVMOVDRRCombine(N, DAG))
12825  return RV;
12826 
12827  // Load i64 elements as f64 values so that type legalization does not split
12828  // them up into i32 values.
12829  EVT VT = N->getValueType(0);
12831  return SDValue();
12832  SDLoc dl(N);
12834  unsigned NumElts = VT.getVectorNumElements();
12835  for (unsigned i = 0; i < NumElts; ++i) {
12836  SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(i));
12837  Ops.push_back(V);
12838  // Make the DAGCombiner fold the bitcast.
12839  DCI.AddToWorklist(V.getNode());
12840  }
12841  EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, NumElts);
12842  SDValue BV = DAG.getBuildVector(FloatVT, dl, Ops);
12843  return DAG.getNode(ISD::BITCAST, dl, VT, BV);
12844 }
12845 
12846 /// Target-specific dag combine xforms for ARMISD::BUILD_VECTOR.
12847 static SDValue
12849  // ARMISD::BUILD_VECTOR is introduced when legalizing ISD::BUILD_VECTOR.
12850  // At that time, we may have inserted bitcasts from integer to float.
12851  // If these bitcasts have survived DAGCombine, change the lowering of this
12852  // BUILD_VECTOR in something more vector friendly, i.e., that does not
12853  // force to use floating point types.
12854 
12855  // Make sure we can change the type of the vector.
12856  // This is possible iff:
12857  // 1. The vector is only used in a bitcast to a integer type. I.e.,
12858  // 1.1. Vector is used only once.
12859  // 1.2. Use is a bit convert to an integer type.
12860  // 2. The size of its operands are 32-bits (64-bits are not legal).
12861  EVT VT = N->getValueType(0);
12862  EVT EltVT = VT.getVectorElementType();
12863 
12864  // Check 1.1. and 2.
12865  if (EltVT.getSizeInBits() != 32 || !N->hasOneUse())
12866  return SDValue();
12867 
12868  // By construction, the input type must be float.
12869  assert(EltVT == MVT::f32 && "Unexpected type!");
12870 
12871  // Check 1.2.
12872  SDNode *Use = *N->use_begin();
12873  if (Use->getOpcode() != ISD::BITCAST ||
12874  Use->getValueType(0).isFloatingPoint())
12875  return SDValue();
12876 
12877  // Check profitability.
12878  // Model is, if more than half of the relevant operands are bitcast from
12879  // i32, turn the build_vector into a sequence of insert_vector_elt.
12880  // Relevant operands are everything that is not statically
12881  // (i.e., at compile time) bitcasted.
12882  unsigned NumOfBitCastedElts = 0;
12883  unsigned NumElts = VT.getVectorNumElements();
12884  unsigned NumOfRelevantElts = NumElts;
12885  for (unsigned Idx = 0; Idx < NumElts; ++Idx) {
12886  SDValue Elt = N->getOperand(Idx);
12887  if (Elt->getOpcode() == ISD::BITCAST) {
12888  // Assume only bit cast to i32 will go away.
12889  if (Elt->getOperand(0).getValueType() == MVT::i32)
12890  ++NumOfBitCastedElts;
12891  } else if (Elt.isUndef() || isa<ConstantSDNode>(Elt))
12892  // Constants are statically casted, thus do not count them as
12893  // relevant operands.
12894  --NumOfRelevantElts;
12895  }
12896 
12897  // Check if more than half of the elements require a non-free bitcast.
12898  if (NumOfBitCastedElts <= NumOfRelevantElts / 2)
12899  return SDValue();
12900 
12901  SelectionDAG &DAG = DCI.DAG;
12902  // Create the new vector type.
12903  EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);
12904  // Check if the type is legal.
12905  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12906  if (!TLI.isTypeLegal(VecVT))
12907  return SDValue();
12908 
12909  // Combine:
12910  // ARMISD::BUILD_VECTOR E1, E2, ..., EN.
12911  // => BITCAST INSERT_VECTOR_ELT
12912  // (INSERT_VECTOR_ELT (...), (BITCAST EN-1), N-1),
12913  // (BITCAST EN), N.
12914  SDValue Vec = DAG.getUNDEF(VecVT);
12915  SDLoc dl(N);
12916  for (unsigned Idx = 0 ; Idx < NumElts; ++Idx) {
12917  SDValue V = N->getOperand(Idx);
12918  if (V.isUndef())
12919  continue;
12920  if (V.getOpcode() == ISD::BITCAST &&
12921  V->getOperand(0).getValueType() == MVT::i32)
12922  // Fold obvious case.
12923  V = V.getOperand(0);
12924  else {
12925  V = DAG.getNode(ISD::BITCAST, SDLoc(V), MVT::i32, V);
12926  // Make the DAGCombiner fold the bitcasts.
12927  DCI.AddToWorklist(V.getNode());
12928  }
12929  SDValue LaneIdx = DAG.getConstant(Idx, dl, MVT::i32);
12930  Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecVT, Vec, V, LaneIdx);
12931  }
12932  Vec = DAG.getNode(ISD::BITCAST, dl, VT, Vec);
12933  // Make the DAGCombiner fold the bitcasts.
12934  DCI.AddToWorklist(Vec.getNode());
12935  return Vec;
12936 }
12937 
12938 static SDValue
12940  EVT VT = N->getValueType(0);
12941  SDValue Op = N->getOperand(0);
12942  SDLoc dl(N);
12943 
12944  // PREDICATE_CAST(PREDICATE_CAST(x)) == PREDICATE_CAST(x)
12945  if (Op->getOpcode() == ARMISD::PREDICATE_CAST) {
12946  // If the valuetypes are the same, we can remove the cast entirely.
12947  if (Op->getOperand(0).getValueType() == VT)
12948  return Op->getOperand(0);
12949  return DCI.DAG.getNode(ARMISD::PREDICATE_CAST, dl,
12950  Op->getOperand(0).getValueType(), Op->getOperand(0));
12951  }
12952 
12953  return SDValue();
12954 }
12955 
12958  const ARMSubtarget *Subtarget) {
12959  if (!Subtarget->hasMVEIntegerOps())
12960  return SDValue();
12961 
12962  EVT VT = N->getValueType(0);
12963  SDValue Op0 = N->getOperand(0);
12964  SDValue Op1 = N->getOperand(1);
12965  ARMCC::CondCodes Cond =
12966  (ARMCC::CondCodes)cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
12967  SDLoc dl(N);
12968 
12969  // vcmp X, 0, cc -> vcmpz X, cc
12970  if (isZeroVector(Op1))
12971  return DCI.DAG.getNode(ARMISD::VCMPZ, dl, VT, Op0,
12972  N->getOperand(2));
12973 
12974  unsigned SwappedCond = getSwappedCondition(Cond);
12975  if (isValidMVECond(SwappedCond, VT.isFloatingPoint())) {
12976  // vcmp 0, X, cc -> vcmpz X, reversed(cc)
12977  if (isZeroVector(Op0))
12978  return DCI.DAG.getNode(ARMISD::VCMPZ, dl, VT, Op1,
12979  DCI.DAG.getConstant(SwappedCond, dl, MVT::i32));
12980  // vcmp vdup(Y), X, cc -> vcmp X, vdup(Y), reversed(cc)
12981  if (Op0->getOpcode() == ARMISD::VDUP && Op1->getOpcode() != ARMISD::VDUP)
12982  return DCI.DAG.getNode(ARMISD::VCMP, dl, VT, Op1, Op0,
12983  DCI.DAG.getConstant(SwappedCond, dl, MVT::i32));
12984  }
12985 
12986  return SDValue();
12987 }
12988 
12989 /// PerformInsertEltCombine - Target-specific dag combine xforms for
12990 /// ISD::INSERT_VECTOR_ELT.
12993  // Bitcast an i64 load inserted into a vector to f64.
12994  // Otherwise, the i64 value will be legalized to a pair of i32 values.
12995  EVT VT = N->getValueType(0);
12996  SDNode *Elt = N->getOperand(1).getNode();
12997  if (VT.getVectorElementType() != MVT::i64 ||
12998  !ISD::isNormalLoad(Elt) || cast<LoadSDNode>(Elt)->isVolatile())
12999  return SDValue();
13000 
13001  SelectionDAG &DAG = DCI.DAG;
13002  SDLoc dl(N);
13003  EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
13004  VT.getVectorNumElements());
13005  SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, N->getOperand(0));
13006  SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(1));
13007  // Make the DAGCombiner fold the bitcasts.
13008  DCI.AddToWorklist(Vec.getNode());
13009  DCI.AddToWorklist(V.getNode());
13010  SDValue InsElt = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, FloatVT,
13011  Vec, V, N->getOperand(2));
13012  return DAG.getNode(ISD::BITCAST, dl, VT, InsElt);
13013 }
13014 
13015 /// PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for
13016 /// ISD::VECTOR_SHUFFLE.
13018  // The LLVM shufflevector instruction does not require the shuffle mask
13019  // length to match the operand vector length, but ISD::VECTOR_SHUFFLE does
13020  // have that requirement. When translating to ISD::VECTOR_SHUFFLE, if the
13021  // operands do not match the mask length, they are extended by concatenating
13022  // them with undef vectors. That is probably the right thing for other
13023  // targets, but for NEON it is better to concatenate two double-register
13024  // size vector operands into a single quad-register size vector. Do that
13025  // transformation here:
13026  // shuffle(concat(v1, undef), concat(v2, undef)) ->
13027  // shuffle(concat(v1, v2), undef)
13028  SDValue Op0 = N->getOperand(0);
13029  SDValue Op1 = N->getOperand(1);
13030  if (Op0.getOpcode() != ISD::CONCAT_VECTORS ||
13031  Op1.getOpcode() != ISD::CONCAT_VECTORS ||
13032  Op0.getNumOperands() != 2 ||
13033  Op1.getNumOperands() != 2)
13034  return SDValue();
13035  SDValue Concat0Op1 = Op0.getOperand(1);
13036  SDValue Concat1Op1 = Op1.getOperand(1);
13037  if (!Concat0Op1.isUndef() || !Concat1Op1.isUndef())
13038  return SDValue();
13039  // Skip the transformation if any of the types are illegal.
13040  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13041  EVT VT = N->getValueType(0);
13042  if (!TLI.isTypeLegal(VT) ||
13043  !TLI.isTypeLegal(Concat0Op1.getValueType()) ||
13044  !TLI.isTypeLegal(Concat1Op1.getValueType()))
13045  return SDValue();
13046 
13047  SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
13048  Op0.getOperand(0), Op1.getOperand(0));
13049  // Translate the shuffle mask.
13050  SmallVector<int, 16> NewMask;
13051  unsigned NumElts = VT.getVectorNumElements();
13052  unsigned HalfElts = NumElts/2;
13053  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
13054  for (unsigned n = 0; n < NumElts; ++n) {
13055  int MaskElt = SVN->getMaskElt(n);
13056  int NewElt = -1;
13057  if (MaskElt < (int)HalfElts)
13058  NewElt = MaskElt;
13059  else if (MaskElt >= (int)NumElts && MaskElt < (int)(NumElts + HalfElts))
13060  NewElt = HalfElts + MaskElt - NumElts;
13061  NewMask.push_back(NewElt);
13062  }
13063  return DAG.getVectorShuffle(VT, SDLoc(N), NewConcat,
13064  DAG.getUNDEF(VT), NewMask);
13065 }
13066 
13067 /// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP,
13068 /// NEON load/store intrinsics, and generic vector load/stores, to merge
13069 /// base address updates.
13070 /// For generic load/stores, the memory type is assumed to be a vector.
13071 /// The caller is assumed to have checked legality.
13074  SelectionDAG &DAG = DCI.DAG;
13075  const bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID ||
13077  const bool isStore = N->getOpcode() == ISD::STORE;
13078  const unsigned AddrOpIdx = ((isIntrinsic || isStore) ? 2 : 1);
13079  SDValue Addr = N->getOperand(AddrOpIdx);
13080  MemSDNode *MemN = cast<MemSDNode>(N);
13081  SDLoc dl(N);
13082 
13083  // Search for a use of the address operand that is an increment.
13084  for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
13085  UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
13086  SDNode *User = *UI;
13087  if (User->getOpcode() != ISD::ADD ||
13088  UI.getUse().getResNo() != Addr.getResNo())
13089  continue;
13090 
13091  // Check that the add is independent of the load/store. Otherwise, folding
13092  // it would create a cycle. We can avoid searching through Addr as it's a
13093  // predecessor to both.
13096  Visited.insert(Addr.getNode());
13097  Worklist.push_back(N);
13098  Worklist.push_back(User);
13099  if (SDNode::hasPredecessorHelper(N, Visited, Worklist) ||
13100  SDNode::hasPredecessorHelper(User, Visited, Worklist))
13101  continue;
13102 
13103  // Find the new opcode for the updating load/store.
13104  bool isLoadOp = true;
13105  bool isLaneOp = false;
13106  unsigned NewOpc = 0;
13107  unsigned NumVecs = 0;
13108  if (isIntrinsic) {
13109  unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
13110  switch (IntNo) {
13111  default: llvm_unreachable("unexpected intrinsic for Neon base update");
13112  case Intrinsic::arm_neon_vld1: NewOpc = ARMISD::VLD1_UPD;
13113  NumVecs = 1; break;
13114  case Intrinsic::arm_neon_vld2: NewOpc = ARMISD::VLD2_UPD;
13115  NumVecs = 2; break;
13116  case Intrinsic::arm_neon_vld3: NewOpc = ARMISD::VLD3_UPD;
13117  NumVecs = 3; break;
13118  case Intrinsic::arm_neon_vld4: NewOpc = ARMISD::VLD4_UPD;
13119  NumVecs = 4; break;
13120  case Intrinsic::arm_neon_vld2dup:
13121  case Intrinsic::arm_neon_vld3dup:
13122  case Intrinsic::arm_neon_vld4dup:
13123  // TODO: Support updating VLDxDUP nodes. For now, we just skip
13124  // combining base updates for such intrinsics.
13125  continue;
13126  case Intrinsic::arm_neon_vld2lane: NewOpc = ARMISD::VLD2LN_UPD;
13127  NumVecs = 2; isLaneOp = true; break;
13128  case Intrinsic::arm_neon_vld3lane: NewOpc = ARMISD::VLD3LN_UPD;
13129  NumVecs = 3; isLaneOp = true; break;
13130  case Intrinsic::arm_neon_vld4lane: NewOpc = ARMISD::VLD4LN_UPD;
13131  NumVecs = 4; isLaneOp = true; break;
13132  case Intrinsic::arm_neon_vst1: NewOpc = ARMISD::VST1_UPD;
13133  NumVecs = 1; isLoadOp = false; break;
13134  case Intrinsic::arm_neon_vst2: NewOpc = ARMISD::VST2_UPD;
13135  NumVecs = 2; isLoadOp = false; break;
13136  case Intrinsic::arm_neon_vst3: NewOpc = ARMISD::VST3_UPD;
13137  NumVecs = 3; isLoadOp = false; break;
13138  case Intrinsic::arm_neon_vst4: NewOpc = ARMISD::VST4_UPD;
13139  NumVecs = 4; isLoadOp = false; break;
13140  case Intrinsic::arm_neon_vst2lane: NewOpc = ARMISD::VST2LN_UPD;
13141  NumVecs = 2; isLoadOp = false; isLaneOp = true; break;
13142  case Intrinsic::arm_neon_vst3lane: NewOpc = ARMISD::VST3LN_UPD;
13143  NumVecs = 3; isLoadOp = false; isLaneOp = true; break;
13144  case Intrinsic::arm_neon_vst4lane: NewOpc = ARMISD::VST4LN_UPD;
13145  NumVecs = 4; isLoadOp = false; isLaneOp = true; break;
13146  }
13147  } else {
13148  isLaneOp = true;
13149  switch (N->getOpcode()) {
13150  default: llvm_unreachable("unexpected opcode for Neon base update");
13151  case ARMISD::VLD1DUP: NewOpc = ARMISD::VLD1DUP_UPD; NumVecs = 1; break;
13152  case ARMISD::VLD2DUP: NewOpc = ARMISD::VLD2DUP_UPD; NumVecs = 2; break;
13153  case ARMISD::VLD3DUP: NewOpc = ARMISD::VLD3DUP_UPD; NumVecs = 3; break;
13154  case ARMISD::VLD4DUP: NewOpc = ARMISD::VLD4DUP_UPD; NumVecs = 4; break;
13155  case ISD::LOAD: NewOpc = ARMISD::VLD1_UPD;
13156  NumVecs = 1; isLaneOp = false; break;
13157  case ISD::STORE: NewOpc = ARMISD::VST1_UPD;
13158  NumVecs = 1; isLaneOp = false; isLoadOp = false; break;
13159  }
13160  }
13161 
13162  // Find the size of memory referenced by the load/store.
13163  EVT VecTy;
13164  if (isLoadOp) {
13165  VecTy = N->getValueType(0);
13166  } else if (isIntrinsic) {
13167  VecTy = N->getOperand(AddrOpIdx+1).getValueType();
13168  } else {
13169  assert(isStore && "Node has to be a load, a store, or an intrinsic!");
13170  VecTy = N->getOperand(1).getValueType();
13171  }
13172 
13173  unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
13174  if (isLaneOp)
13175  NumBytes /= VecTy.getVectorNumElements();
13176 
13177  // If the increment is a constant, it must match the memory ref size.
13178  SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
13180  if (NumBytes >= 3 * 16 && (!CInc || CInc->getZExtValue() != NumBytes)) {
13181  // VLD3/4 and VST3/4 for 128-bit vectors are implemented with two
13182  // separate instructions that make it harder to use a non-constant update.
13183  continue;
13184  }
13185 
13186  // OK, we found an ADD we can fold into the base update.
13187  // Now, create a _UPD node, taking care of not breaking alignment.
13188 
13189  EVT AlignedVecTy = VecTy;
13190  unsigned Alignment = MemN->getAlignment();
13191 
13192  // If this is a less-than-standard-aligned load/store, change the type to
13193  // match the standard alignment.
13194  // The alignment is overlooked when selecting _UPD variants; and it's
13195  // easier to introduce bitcasts here than fix that.
13196  // There are 3 ways to get to this base-update combine:
13197  // - intrinsics: they are assumed to be properly aligned (to the standard
13198  // alignment of the memory type), so we don't need to do anything.
13199  // - ARMISD::VLDx nodes: they are only generated from the aforementioned
13200  // intrinsics, so, likewise, there's nothing to do.
13201  // - generic load/store instructions: the alignment is specified as an
13202  // explicit operand, rather than implicitly as the standard alignment
13203  // of the memory type (like the intrisics). We need to change the
13204  // memory type to match the explicit alignment. That way, we don't
13205  // generate non-standard-aligned ARMISD::VLDx nodes.
13206  if (isa<LSBaseSDNode>(N)) {
13207  if (Alignment == 0)
13208  Alignment = 1;
13209  if (Alignment < VecTy.getScalarSizeInBits() / 8) {
13210  MVT EltTy = MVT::getIntegerVT(Alignment * 8);
13211  assert(NumVecs == 1 && "Unexpected multi-element generic load/store.");
13212  assert(!isLaneOp && "Unexpected generic load/store lane.");
13213  unsigned NumElts = NumBytes / (EltTy.getSizeInBits() / 8);
13214  AlignedVecTy = MVT::getVectorVT(EltTy, NumElts);
13215  }
13216  // Don't set an explicit alignment on regular load/stores that we want
13217  // to transform to VLD/VST 1_UPD nodes.
13218  // This matches the behavior of regular load/stores, which only get an
13219  // explicit alignment if the MMO alignment is larger than the standard
13220  // alignment of the memory type.
13221  // Intrinsics, however, always get an explicit alignment, set to the
13222  // alignment of the MMO.
13223  Alignment = 1;
13224  }
13225 
13226  // Create the new updating load/store node.
13227  // First, create an SDVTList for the new updating node's results.
13228  EVT Tys[6];
13229  unsigned NumResultVecs = (isLoadOp ? NumVecs : 0);
13230  unsigned n;
13231  for (n = 0; n < NumResultVecs; ++n)
13232  Tys[n] = AlignedVecTy;
13233  Tys[n++] = MVT::i32;
13234  Tys[n] = MVT::Other;
13235  SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumResultVecs+2));
13236 
13237  // Then, gather the new node's operands.
13239  Ops.push_back(N->getOperand(0)); // incoming chain
13240  Ops.push_back(N->getOperand(AddrOpIdx));
13241  Ops.push_back(Inc);
13242 
13243  if (StoreSDNode *StN = dyn_cast<StoreSDNode>(N)) {
13244  // Try to match the intrinsic's signature
13245  Ops.push_back(StN->getValue());
13246  } else {
13247  // Loads (and of course intrinsics) match the intrinsics' signature,
13248  // so just add all but the alignment operand.
13249  for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands() - 1; ++i)
13250  Ops.push_back(N->getOperand(i));
13251  }
13252 
13253  // For all node types, the alignment operand is always the last one.
13254  Ops.push_back(DAG.getConstant(Alignment, dl, MVT::i32));
13255 
13256  // If this is a non-standard-aligned STORE, the penultimate operand is the
13257  // stored value. Bitcast it to the aligned type.
13258  if (AlignedVecTy != VecTy && N->getOpcode() == ISD::STORE) {
13259  SDValue &StVal = Ops[Ops.size()-2];
13260  StVal = DAG.getNode(ISD::BITCAST, dl, AlignedVecTy, StVal);
13261  }
13262 
13263  EVT LoadVT = isLaneOp ? VecTy.getVectorElementType() : AlignedVecTy;
13264  SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, dl, SDTys, Ops, LoadVT,
13265  MemN->getMemOperand());
13266 
13267  // Update the uses.
13268  SmallVector<SDValue, 5> NewResults;
13269  for (unsigned i = 0; i < NumResultVecs; ++i)
13270  NewResults.push_back(SDValue(UpdN.getNode(), i));
13271 
13272  // If this is an non-standard-aligned LOAD, the first result is the loaded
13273  // value. Bitcast it to the expected result type.
13274  if (AlignedVecTy != VecTy && N->getOpcode() == ISD::LOAD) {
13275  SDValue &LdVal = NewResults[0];
13276  LdVal = DAG.getNode(ISD::BITCAST, dl, VecTy, LdVal);
13277  }
13278 
13279  NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs+1)); // chain
13280  DCI.CombineTo(N, NewResults);
13281  DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));
13282 
13283  break;
13284  }
13285  return SDValue();
13286 }
13287 
13290  if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
13291  return SDValue();
13292 
13293  return CombineBaseUpdate(N, DCI);
13294 }
13295 
13296 /// CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a
13297 /// vldN-lane (N > 1) intrinsic, and if all the other uses of that intrinsic
13298 /// are also VDUPLANEs. If so, combine them to a vldN-dup operation and
13299 /// return true.
13301  SelectionDAG &DAG = DCI.DAG;
13302  EVT VT = N->getValueType(0);
13303  // vldN-dup instructions only support 64-bit vectors for N > 1.
13304  if (!VT.is64BitVector())
13305  return false;
13306 
13307  // Check if the VDUPLANE operand is a vldN-dup intrinsic.
13308  SDNode *VLD = N->getOperand(0).getNode();
13309  if (VLD->getOpcode() != ISD::INTRINSIC_W_CHAIN)
13310  return false;
13311  unsigned NumVecs = 0;
13312  unsigned NewOpc = 0;
13313  unsigned IntNo = cast<ConstantSDNode>(VLD->getOperand(1))->getZExtValue();
13314  if (IntNo == Intrinsic::arm_neon_vld2lane) {
13315  NumVecs = 2;
13316  NewOpc = ARMISD::VLD2DUP;
13317  } else if (IntNo == Intrinsic::arm_neon_vld3lane) {
13318  NumVecs = 3;
13319  NewOpc = ARMISD::VLD3DUP;
13320  } else if (IntNo == Intrinsic::arm_neon_vld4lane) {
13321  NumVecs = 4;
13322  NewOpc = ARMISD::VLD4DUP;
13323  } else {
13324  return false;
13325  }
13326 
13327  // First check that all the vldN-lane uses are VDUPLANEs and that the lane
13328  // numbers match the load.
13329  unsigned VLDLaneNo =
13330  cast<ConstantSDNode>(VLD->getOperand(NumVecs+3))->getZExtValue();
13331  for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
13332  UI != UE; ++UI) {
13333  // Ignore uses of the chain result.
13334  if (UI.getUse().getResNo() == NumVecs)
13335  continue;
13336  SDNode *User = *UI;
13337  if (User->getOpcode() != ARMISD::VDUPLANE ||
13338  VLDLaneNo != cast<ConstantSDNode>(User->getOperand(1))->getZExtValue())
13339  return false;
13340  }
13341 
13342  // Create the vldN-dup node.
13343  EVT Tys[5];
13344  unsigned n;
13345  for (n = 0; n < NumVecs; ++n)
13346  Tys[n] = VT;
13347  Tys[n] = MVT::Other;
13348  SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumVecs+1));
13349  SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) };
13350  MemIntrinsicSDNode *VLDMemInt = cast<MemIntrinsicSDNode>(VLD);
13351  SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, SDLoc(VLD), SDTys,
13352  Ops, VLDMemInt->getMemoryVT(),
13353  VLDMemInt->getMemOperand());
13354 
13355  // Update the uses.
13356  for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
13357  UI != UE; ++UI) {
13358  unsigned ResNo = UI.getUse().getResNo();
13359  // Ignore uses of the chain result.
13360  if (ResNo == NumVecs)
13361  continue;
13362  SDNode *User = *UI;
13363  DCI.CombineTo(User, SDValue(VLDDup.getNode(), ResNo));
13364  }
13365 
13366  // Now the vldN-lane intrinsic is dead except for its chain result.
13367  // Update uses of the chain.
13368  std::vector<SDValue> VLDDupResults;
13369  for (unsigned n = 0; n < NumVecs; ++n)
13370  VLDDupResults.push_back(SDValue(VLDDup.getNode(), n));
13371  VLDDupResults.push_back(SDValue(VLDDup.getNode(), NumVecs));
13372  DCI.CombineTo(VLD, VLDDupResults);
13373 
13374  return true;
13375 }
13376 
13377 /// PerformVDUPLANECombine - Target-specific dag combine xforms for
13378 /// ARMISD::VDUPLANE.
13381  SDValue Op = N->getOperand(0);
13382 
13383  // If the source is a vldN-lane (N > 1) intrinsic, and all the other uses
13384  // of that intrinsic are also VDUPLANEs, combine them to a vldN-dup operation.
13385  if (CombineVLDDUP(N, DCI))
13386  return SDValue(N, 0);
13387 
13388  // If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is
13389  // redundant. Ignore bit_converts for now; element sizes are checked below.
13390  while (Op.getOpcode() == ISD::BITCAST)
13391  Op = Op.getOperand(0);
13392  if (Op.getOpcode() != ARMISD::VMOVIMM && Op.getOpcode() != ARMISD::VMVNIMM)
13393  return SDValue();
13394 
13395  // Make sure the VMOV element size is not bigger than the VDUPLANE elements.
13396  unsigned EltSize = Op.getScalarValueSizeInBits();
13397  // The canonical VMOV for a zero vector uses a 32-bit element size.
13398  unsigned Imm = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
13399  unsigned EltBits;
13400  if (ARM_AM::decodeVMOVModImm(Imm, EltBits) == 0)
13401  EltSize = 8;
13402  EVT VT = N->getValueType(0);
13403  if (EltSize > VT.getScalarSizeInBits())
13404  return SDValue();
13405 
13406  return DCI.DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
13407 }
13408 
13409 /// PerformVDUPCombine - Target-specific dag combine xforms for ARMISD::VDUP.
13412  const ARMSubtarget *Subtarget) {
13413  SelectionDAG &DAG = DCI.DAG;
13414  SDValue Op = N->getOperand(0);
13415 
13416  if (!Subtarget->hasNEON())
13417  return SDValue();
13418 
13419  // Match VDUP(LOAD) -> VLD1DUP.
13420  // We match this pattern here rather than waiting for isel because the
13421  // transform is only legal for unindexed loads.
13423  if (LD && Op.hasOneUse() && LD->isUnindexed() &&
13424  LD->getMemoryVT() == N->getValueType(0).getVectorElementType()) {
13425  SDValue Ops[] = { LD->getOperand(0), LD->getOperand(1),
13426  DAG.getConstant(LD->getAlignment(), SDLoc(N), MVT::i32) };
13427  SDVTList SDTys = DAG.getVTList(N->getValueType(0), MVT::Other);
13428  SDValue VLDDup = DAG.getMemIntrinsicNode(ARMISD::VLD1DUP, SDLoc(N), SDTys,
13429  Ops, LD->getMemoryVT(),
13430  LD->getMemOperand());
13431  DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), VLDDup.getValue(1));
13432  return VLDDup;
13433  }
13434 
13435  return SDValue();
13436 }
13437 
13440  EVT VT = N->getValueType(0);
13441 
13442  // If this is a legal vector load, try to combine it into a VLD1_UPD.
13443  if (ISD::isNormalLoad(N) && VT.isVector() &&
13445  return CombineBaseUpdate(N, DCI);
13446 
13447  return SDValue();
13448 }
13449 
13450 // Optimize trunc store (of multiple scalars) to shuffle and store. First,
13451 // pack all of the elements in one place. Next, store to memory in fewer
13452 // chunks.
13454  SelectionDAG &DAG) {
13455  SDValue StVal = St->getValue();
13456  EVT VT = StVal.getValueType();
13457  if (!St->isTruncatingStore() || !VT.isVector())
13458  return SDValue();
13459  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13460  EVT StVT = St->getMemoryVT();
13461  unsigned NumElems = VT.getVectorNumElements();
13462  assert(StVT != VT && "Cannot truncate to the same type");
13463  unsigned FromEltSz = VT.getScalarSizeInBits();
13464  unsigned ToEltSz = StVT.getScalarSizeInBits();
13465 
13466  // From, To sizes and ElemCount must be pow of two
13467  if (!isPowerOf2_32(NumElems * FromEltSz * ToEltSz))
13468  return SDValue();
13469 
13470  // We are going to use the original vector elt for storing.
13471  // Accumulated smaller vector elements must be a multiple of the store size.
13472  if (0 != (NumElems * FromEltSz) % ToEltSz)
13473  return SDValue();
13474 
13475  unsigned SizeRatio = FromEltSz / ToEltSz;
13476  assert(SizeRatio * NumElems * ToEltSz == VT.getSizeInBits());
13477 
13478  // Create a type on which we perform the shuffle.
13479  EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(),
13480  NumElems * SizeRatio);
13481  assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
13482 
13483  SDLoc DL(St);
13484  SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal);
13485  SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
13486  for (unsigned i = 0; i < NumElems; ++i)
13487  ShuffleVec[i] = DAG.getDataLayout().isBigEndian() ? (i + 1) * SizeRatio - 1
13488  : i * SizeRatio;
13489 
13490  // Can't shuffle using an illegal type.
13491  if (!TLI.isTypeLegal(WideVecVT))
13492  return SDValue();
13493 
13494  SDValue Shuff = DAG.getVectorShuffle(
13495  WideVecVT, DL, WideVec, DAG.getUNDEF(WideVec.getValueType()), ShuffleVec);
13496  // At this point all of the data is stored at the bottom of the
13497  // register. We now need to save it to mem.
13498 
13499  // Find the largest store unit
13500  MVT StoreType = MVT::i8;
13501  for (MVT Tp : MVT::integer_valuetypes()) {
13502  if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToEltSz)
13503  StoreType = Tp;
13504  }
13505  // Didn't find a legal store type.
13506  if (!TLI.isTypeLegal(StoreType))
13507  return SDValue();
13508 
13509  // Bitcast the original vector into a vector of store-size units
13510  EVT StoreVecVT =
13511  EVT::getVectorVT(*DAG.getContext(), StoreType,
13512  VT.getSizeInBits() / EVT(StoreType).getSizeInBits());
13513  assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits());
13514  SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff);
13515  SmallVector<SDValue, 8> Chains;
13516  SDValue Increment = DAG.getConstant(StoreType.getSizeInBits() / 8, DL,
13517  TLI.getPointerTy(DAG.getDataLayout()));
13518  SDValue BasePtr = St->getBasePtr();
13519 
13520  // Perform one or more big stores into memory.
13521  unsigned E = (ToEltSz * NumElems) / StoreType.getSizeInBits();
13522  for (unsigned I = 0; I < E; I++) {
13523  SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, StoreType,
13524  ShuffWide, DAG.getIntPtrConstant(I, DL));
13525  SDValue Ch =
13526  DAG.getStore(St->getChain(), DL, SubVec, BasePtr, St->getPointerInfo(),
13527  St->getAlignment(), St->getMemOperand()->getFlags());
13528  BasePtr =
13529  DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr, Increment);
13530  Chains.push_back(Ch);
13531  }
13532  return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
13533 }
13534 
13535 // Try taking a single vector store from an truncate (which would otherwise turn
13536 // into an expensive buildvector) and splitting it into a series of narrowing
13537 // stores.
13539  SelectionDAG &DAG) {
13540  if (!St->isSimple() || St->isTruncatingStore() || !St->isUnindexed())
13541  return SDValue();
13542  SDValue Trunc = St->getValue();
13543  if (Trunc->getOpcode() != ISD::TRUNCATE)
13544  return SDValue();
13545  EVT FromVT = Trunc->getOperand(0).getValueType();
13546  EVT ToVT = Trunc.getValueType();
13547  if (!ToVT.isVector())
13548  return SDValue();
13549  assert(FromVT.getVectorNumElements() == ToVT.getVectorNumElements());
13550  EVT ToEltVT = ToVT.getVectorElementType();
13551  EVT FromEltVT = FromVT.getVectorElementType();
13552 
13553  unsigned NumElements = 0;
13554  if (FromEltVT == MVT::i32 && (ToEltVT == MVT::i16 || ToEltVT == MVT::i8))
13555  NumElements = 4;
13556  if (FromEltVT == MVT::i16 && ToEltVT == MVT::i8)
13557  NumElements = 8;
13558  if (NumElements == 0 || FromVT.getVectorNumElements() == NumElements ||
13559  FromVT.getVectorNumElements() % NumElements != 0)
13560  return SDValue();
13561 
13562  SDLoc DL(St);
13563  // Details about the old store
13564  SDValue Ch = St->getChain();
13565  SDValue BasePtr = St->getBasePtr();
13566  unsigned Alignment = St->getOriginalAlignment();
13567  MachineMemOperand::Flags MMOFlags = St->getMemOperand()->getFlags();
13568  AAMDNodes AAInfo = St->getAAInfo();
13569 
13570  EVT NewFromVT = EVT::getVectorVT(*DAG.getContext(), FromEltVT, NumElements);
13571  EVT NewToVT = EVT::getVectorVT(*DAG.getContext(), ToEltVT, NumElements);
13572 
13573  SmallVector<SDValue, 4> Stores;
13574  for (unsigned i = 0; i < FromVT.getVectorNumElements() / NumElements; i++) {
13575  unsigned NewOffset = i * NumElements * ToEltVT.getSizeInBits() / 8;
13576  SDValue NewPtr = DAG.getObjectPtrOffset(DL, BasePtr, NewOffset);
13577 
13578  SDValue Extract =
13579  DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewFromVT, Trunc.getOperand(0),
13580  DAG.getConstant(i * NumElements, DL, MVT::i32));
13581  SDValue Store = DAG.getTruncStore(
13582  Ch, DL, Extract, NewPtr, St->getPointerInfo().getWithOffset(NewOffset),
13583  NewToVT, Alignment, MMOFlags, AAInfo);
13584  Stores.push_back(Store);
13585  }
13586  return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
13587 }
13588 
13589 /// PerformSTORECombine - Target-specific dag combine xforms for
13590 /// ISD::STORE.
13593  const ARMSubtarget *Subtarget) {
13594  StoreSDNode *St = cast<StoreSDNode>(N);
13595  if (St->isVolatile())
13596  return SDValue();
13597  SDValue StVal = St->getValue();
13598  EVT VT = StVal.getValueType();
13599 
13600  if (Subtarget->hasNEON())
13602  return Store;
13603 
13604  if (Subtarget->hasMVEIntegerOps())
13605  if (SDValue NewToken = PerformSplittingToNarrowingStores(St, DCI.DAG))
13606  return NewToken;
13607 
13608  if (!ISD::isNormalStore(St))
13609  return SDValue();
13610 
13611  // Split a store of a VMOVDRR into two integer stores to avoid mixing NEON and
13612  // ARM stores of arguments in the same cache line.
13613  if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR &&
13614  StVal.getNode()->hasOneUse()) {
13615  SelectionDAG &DAG = DCI.DAG;
13616  bool isBigEndian = DAG.getDataLayout().isBigEndian();
13617  SDLoc DL(St);
13618  SDValue BasePtr = St->getBasePtr();
13619  SDValue NewST1 = DAG.getStore(
13620  St->getChain(), DL, StVal.getNode()->getOperand(isBigEndian ? 1 : 0),
13621  BasePtr, St->getPointerInfo(), St->getAlignment(),
13622  St->getMemOperand()->getFlags());
13623 
13624  SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
13625  DAG.getConstant(4, DL, MVT::i32));
13626  return DAG.getStore(NewST1.getValue(0), DL,
13627  StVal.getNode()->getOperand(isBigEndian ? 0 : 1),
13628  OffsetPtr, St->getPointerInfo(),
13629  std::min(4U, St->getAlignment() / 2),
13630  St->getMemOperand()->getFlags());
13631  }
13632 
13633  if (StVal.getValueType() == MVT::i64 &&
13634  StVal.getNode()->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
13635 
13636  // Bitcast an i64 store extracted from a vector to f64.
13637  // Otherwise, the i64 value will be legalized to a pair of i32 values.
13638  SelectionDAG &DAG = DCI.DAG;
13639  SDLoc dl(StVal);
13640  SDValue IntVec = StVal.getOperand(0);
13641  EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
13642  IntVec.getValueType().getVectorNumElements());
13643  SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, IntVec);
13644  SDValue ExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
13645  Vec, StVal.getOperand(1));
13646  dl = SDLoc(N);
13647  SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ExtElt);
13648  // Make the DAGCombiner fold the bitcasts.
13649  DCI.AddToWorklist(Vec.getNode());
13650  DCI.AddToWorklist(ExtElt.getNode());
13651  DCI.AddToWorklist(V.getNode());
13652  return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(),
13653  St->getPointerInfo(), St->getAlignment(),
13654  St->getMemOperand()->getFlags(), St->getAAInfo());
13655  }
13656 
13657  // If this is a legal vector store, try to combine it into a VST1_UPD.
13658  if (Subtarget->hasNEON() && ISD::isNormalStore(N) && VT.isVector() &&
13660  return CombineBaseUpdate(N, DCI);
13661 
13662  return SDValue();
13663 }
13664 
13665 /// PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD)
13666 /// can replace combinations of VMUL and VCVT (floating-point to integer)
13667 /// when the VMUL has a constant operand that is a power of 2.
13668 ///
13669 /// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>):
13670 /// vmul.f32 d16, d17, d16
13671 /// vcvt.s32.f32 d16, d16
13672 /// becomes:
13673 /// vcvt.s32.f32 d16, d16, #3
13675  const ARMSubtarget *Subtarget) {
13676  if (!Subtarget->hasNEON())
13677  return SDValue();
13678 
13679  SDValue Op = N->getOperand(0);
13680  if (!Op.getValueType().isVector() || !Op.getValueType().isSimple() ||
13681  Op.getOpcode() != ISD::FMUL)
13682  return SDValue();
13683 
13684  SDValue ConstVec = Op->getOperand(1);
13685  if (!isa<BuildVectorSDNode>(ConstVec))
13686  return SDValue();
13687 
13688  MVT FloatTy = Op.getSimpleValueType().getVectorElementType();
13689  uint32_t FloatBits = FloatTy.getSizeInBits();
13691  uint32_t IntBits = IntTy.getSizeInBits();
13692  unsigned NumLanes = Op.getValueType().getVectorNumElements();
13693  if (FloatBits != 32 || IntBits > 32 || (NumLanes != 4 && NumLanes != 2)) {
13694  // These instructions only exist converting from f32 to i32. We can handle
13695  // smaller integers by generating an extra truncate, but larger ones would
13696  // be lossy. We also can't handle anything other than 2 or 4 lanes, since
13697  // these intructions only support v2i32/v4i32 types.
13698  return SDValue();
13699  }
13700 
13701  BitVector UndefElements;
13702  BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
13703  int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, 33);
13704  if (C == -1 || C == 0 || C > 32)
13705  return SDValue();
13706 
13707  SDLoc dl(N);
13708  bool isSigned = N->getOpcode() == ISD::FP_TO_SINT;
13709  unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs :
13710  Intrinsic::arm_neon_vcvtfp2fxu;
13711  SDValue FixConv = DAG.getNode(
13712  ISD::INTRINSIC_WO_CHAIN, dl, NumLanes == 2 ? MVT::v2i32 : MVT::v4i32,
13713  DAG.getConstant(IntrinsicOpcode, dl, MVT::i32), Op->getOperand(0),
13714  DAG.getConstant(C, dl, MVT::i32));
13715 
13716  if (IntBits < FloatBits)
13717  FixConv = DAG.getNode(ISD::TRUNCATE, dl, N->getValueType(0), FixConv);
13718 
13719  return FixConv;
13720 }
13721 
13722 /// PerformVDIVCombine - VCVT (fixed-point to floating-point, Advanced SIMD)
13723 /// can replace combinations of VCVT (integer to floating-point) and VDIV
13724 /// when the VDIV has a constant operand that is a power of 2.
13725 ///
13726 /// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>):
13727 /// vcvt.f32.s32 d16, d16
13728 /// vdiv.f32 d16, d17, d16
13729 /// becomes:
13730 /// vcvt.f32.s32 d16, d16, #3
13732  const ARMSubtarget *Subtarget) {
13733  if (!Subtarget->hasNEON())
13734  return SDValue();
13735 
13736  SDValue Op = N->getOperand(0);
13737  unsigned OpOpcode = Op.getNode()->getOpcode();
13738  if (!N->getValueType(0).isVector() || !N->getValueType(0).isSimple() ||
13739  (OpOpcode != ISD::SINT_TO_FP && OpOpcode != ISD::UINT_TO_FP))
13740  return SDValue();
13741 
13742  SDValue ConstVec = N->getOperand(1);
13743  if (!isa<BuildVectorSDNode>(ConstVec))
13744  return SDValue();
13745 
13746  MVT FloatTy = N->getSimpleValueType(0).getVectorElementType();
13747  uint32_t FloatBits = FloatTy.getSizeInBits();
13749  uint32_t IntBits = IntTy.getSizeInBits();
13750  unsigned NumLanes = Op.getValueType().getVectorNumElements();
13751  if (FloatBits != 32 || IntBits > 32 || (NumLanes != 4 && NumLanes != 2)) {
13752  // These instructions only exist converting from i32 to f32. We can handle
13753  // smaller integers by generating an extra extend, but larger ones would
13754  // be lossy. We also can't handle anything other than 2 or 4 lanes, since
13755  // these intructions only support v2i32/v4i32 types.
13756  return SDValue();
13757  }
13758 
13759  BitVector UndefElements;
13760  BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
13761  int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, 33);
13762  if (C == -1 || C == 0 || C > 32)
13763  return SDValue();
13764 
13765  SDLoc dl(N);
13766  bool isSigned = OpOpcode == ISD::SINT_TO_FP;
13767  SDValue ConvInput = Op.getOperand(0);
13768  if (IntBits < FloatBits)
13769  ConvInput = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
13770  dl, NumLanes == 2 ? MVT::v2i32 : MVT::v4i32,
13771  ConvInput);
13772 
13773  unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfxs2fp :
13774  Intrinsic::arm_neon_vcvtfxu2fp;
13775  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl,
13776  Op.getValueType(),
13777  DAG.getConstant(IntrinsicOpcode, dl, MVT::i32),
13778  ConvInput, DAG.getConstant(C, dl, MVT::i32));
13779 }
13780 
13781 /// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
13783  unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
13784  switch (IntNo) {
13785  default:
13786  // Don't do anything for most intrinsics.
13787  break;
13788 
13789  // Vector shifts: check for immediate versions and lower them.
13790  // Note: This is done during DAG combining instead of DAG legalizing because
13791  // the build_vectors for 64-bit vector element shift counts are generally
13792  // not legal, and it is hard to see their values after they get legalized to
13793  // loads from a constant pool.
13794  case Intrinsic::arm_neon_vshifts:
13795  case Intrinsic::arm_neon_vshiftu:
13796  case Intrinsic::arm_neon_vrshifts:
13797  case Intrinsic::arm_neon_vrshiftu:
13798  case Intrinsic::arm_neon_vrshiftn:
13799  case Intrinsic::arm_neon_vqshifts:
13800  case Intrinsic::arm_neon_vqshiftu:
13801  case Intrinsic::arm_neon_vqshiftsu:
13802  case Intrinsic::arm_neon_vqshiftns:
13803  case Intrinsic::arm_neon_vqshiftnu:
13804  case Intrinsic::arm_neon_vqshiftnsu:
13805  case Intrinsic::arm_neon_vqrshiftns:
13806  case Intrinsic::arm_neon_vqrshiftnu:
13807  case Intrinsic::arm_neon_vqrshiftnsu: {
13808  EVT VT = N->getOperand(1).getValueType();
13809  int64_t Cnt;
13810  unsigned VShiftOpc = 0;
13811 
13812  switch (IntNo) {
13813  case Intrinsic::arm_neon_vshifts:
13814  case Intrinsic::arm_neon_vshiftu:
13815  if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) {
13816  VShiftOpc = ARMISD::VSHLIMM;
13817  break;
13818  }
13819  if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) {
13820  VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ? ARMISD::VSHRsIMM
13821  : ARMISD::VSHRuIMM);
13822  break;
13823  }
13824  return SDValue();
13825 
13826  case Intrinsic::arm_neon_vrshifts:
13827  case Intrinsic::arm_neon_vrshiftu:
13828  if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt))
13829  break;
13830  return SDValue();
13831 
13832  case Intrinsic::arm_neon_vqshifts:
13833  case Intrinsic::arm_neon_vqshiftu:
13834  if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
13835  break;
13836  return SDValue();
13837 
13838  case Intrinsic::arm_neon_vqshiftsu:
13839  if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
13840  break;
13841  llvm_unreachable("invalid shift count for vqshlu intrinsic");
13842 
13843  case Intrinsic::arm_neon_vrshiftn:
13844  case Intrinsic::arm_neon_vqshiftns:
13845  case Intrinsic::arm_neon_vqshiftnu:
13846  case Intrinsic::arm_neon_vqshiftnsu:
13847  case Intrinsic::arm_neon_vqrshiftns:
13848  case Intrinsic::arm_neon_vqrshiftnu:
13849  case Intrinsic::arm_neon_vqrshiftnsu:
13850  // Narrowing shifts require an immediate right shift.
13851  if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt))
13852  break;
13853  llvm_unreachable("invalid shift count for narrowing vector shift "
13854  "intrinsic");
13855 
13856  default:
13857  llvm_unreachable("unhandled vector shift");
13858  }
13859 
13860  switch (IntNo) {
13861  case Intrinsic::arm_neon_vshifts:
13862  case Intrinsic::arm_neon_vshiftu:
13863  // Opcode already set above.
13864  break;
13865  case Intrinsic::arm_neon_vrshifts:
13866  VShiftOpc = ARMISD::VRSHRsIMM;
13867  break;
13868  case Intrinsic::arm_neon_vrshiftu:
13869  VShiftOpc = ARMISD::VRSHRuIMM;
13870  break;
13871  case Intrinsic::arm_neon_vrshiftn:
13872  VShiftOpc = ARMISD::VRSHRNIMM;
13873  break;
13874  case Intrinsic::arm_neon_vqshifts:
13875  VShiftOpc = ARMISD::VQSHLsIMM;
13876  break;
13877  case Intrinsic::arm_neon_vqshiftu:
13878  VShiftOpc = ARMISD::VQSHLuIMM;
13879  break;
13880  case Intrinsic::arm_neon_vqshiftsu:
13881  VShiftOpc = ARMISD::VQSHLsuIMM;
13882  break;
13883  case Intrinsic::arm_neon_vqshiftns:
13884  VShiftOpc = ARMISD::VQSHRNsIMM;
13885  break;
13886  case Intrinsic::arm_neon_vqshiftnu:
13887  VShiftOpc = ARMISD::VQSHRNuIMM;
13888  break;
13889  case Intrinsic::arm_neon_vqshiftnsu:
13890  VShiftOpc = ARMISD::VQSHRNsuIMM;
13891  break;
13892  case Intrinsic::arm_neon_vqrshiftns:
13893  VShiftOpc = ARMISD::VQRSHRNsIMM;
13894  break;
13895  case Intrinsic::arm_neon_vqrshiftnu:
13896  VShiftOpc = ARMISD::VQRSHRNuIMM;
13897  break;
13898  case Intrinsic::arm_neon_vqrshiftnsu:
13899  VShiftOpc = ARMISD::VQRSHRNsuIMM;
13900  break;
13901  }
13902 
13903  SDLoc dl(N);
13904  return DAG.getNode(VShiftOpc, dl, N->getValueType(0),
13905  N->getOperand(1), DAG.getConstant(Cnt, dl, MVT::i32));
13906  }
13907 
13908  case Intrinsic::arm_neon_vshiftins: {
13909  EVT VT = N->getOperand(1).getValueType();
13910  int64_t Cnt;
13911  unsigned VShiftOpc = 0;
13912 
13913  if (isVShiftLImm(N->getOperand(3), VT, false, Cnt))
13914  VShiftOpc = ARMISD::VSLIIMM;
13915  else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt))
13916  VShiftOpc = ARMISD::VSRIIMM;
13917  else {
13918  llvm_unreachable("invalid shift count for vsli/vsri intrinsic");
13919  }
13920 
13921  SDLoc dl(N);
13922  return DAG.getNode(VShiftOpc, dl, N->getValueType(0),
13923  N->getOperand(1), N->getOperand(2),
13924  DAG.getConstant(Cnt, dl, MVT::i32));
13925  }
13926 
13927  case Intrinsic::arm_neon_vqrshifts:
13928  case Intrinsic::arm_neon_vqrshiftu:
13929  // No immediate versions of these to check for.
13930  break;
13931  }
13932 
13933  return SDValue();
13934 }
13935 
13936 /// PerformShiftCombine - Checks for immediate versions of vector shifts and
13937 /// lowers them. As with the vector shift intrinsics, this is done during DAG
13938 /// combining instead of DAG legalizing because the build_vectors for 64-bit
13939 /// vector element shift counts are generally not legal, and it is hard to see
13940 /// their values after they get legalized to loads from a constant pool.
13943  const ARMSubtarget *ST) {
13944  SelectionDAG &DAG = DCI.DAG;
13945  EVT VT = N->getValueType(0);
13946  if (N->getOpcode() == ISD::SRL && VT == MVT::i32 && ST->hasV6Ops()) {
13947  // Canonicalize (srl (bswap x), 16) to (rotr (bswap x), 16) if the high
13948  // 16-bits of x is zero. This optimizes rev + lsr 16 to rev16.
13949  SDValue N1 = N->getOperand(1);
13950  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
13951  SDValue N0 = N->getOperand(0);
13952  if (C->getZExtValue() == 16 && N0.getOpcode() == ISD::BSWAP &&
13953  DAG.MaskedValueIsZero(N0.getOperand(0),
13954  APInt::getHighBitsSet(32, 16)))
13955  return DAG.getNode(ISD::ROTR, SDLoc(N), VT, N0, N1);
13956  }
13957  }
13958 
13959  if (ST->isThumb1Only() && N->getOpcode() == ISD::SHL && VT == MVT::i32 &&
13960  N->getOperand(0)->getOpcode() == ISD::AND &&
13961  N->getOperand(0)->hasOneUse()) {
13962  if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
13963  return SDValue();
13964  // Look for the pattern (shl (and x, AndMask), ShiftAmt). This doesn't
13965  // usually show up because instcombine prefers to canonicalize it to
13966  // (and (shl x, ShiftAmt) (shl AndMask, ShiftAmt)), but the shift can come
13967  // out of GEP lowering in some cases.
13968  SDValue N0 = N->getOperand(0);
13969  ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
13970  if (!ShiftAmtNode)
13971  return SDValue();
13972  uint32_t ShiftAmt = static_cast<uint32_t>(ShiftAmtNode->getZExtValue());
13973  ConstantSDNode *AndMaskNode = dyn_cast<ConstantSDNode>(N0->getOperand(1));
13974  if (!AndMaskNode)
13975  return SDValue();
13976  uint32_t AndMask = static_cast<uint32_t>(AndMaskNode->getZExtValue());
13977  // Don't transform uxtb/uxth.
13978  if (AndMask == 255 || AndMask == 65535)
13979  return SDValue();
13980  if (isMask_32(AndMask)) {
13981  uint32_t MaskedBits = countLeadingZeros(AndMask);
13982  if (MaskedBits > ShiftAmt) {
13983  SDLoc DL(N);
13984  SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),
13985  DAG.getConstant(MaskedBits, DL, MVT::i32));
13986  return DAG.getNode(
13987  ISD::SRL, DL, MVT::i32, SHL,
13988  DAG.getConstant(MaskedBits - ShiftAmt, DL, MVT::i32));
13989  }
13990  }
13991  }
13992 
13993  // Nothing to be done for scalar shifts.
13994  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13995  if (!VT.isVector() || !TLI.isTypeLegal(VT))
13996  return SDValue();
13997  if (ST->hasMVEIntegerOps() && VT == MVT::v2i64)
13998  return SDValue();
13999 
14000  int64_t Cnt;
14001 
14002  switch (N->getOpcode()) {
14003  default: llvm_unreachable("unexpected shift opcode");
14004 
14005  case ISD::SHL:
14006  if (isVShiftLImm(N->getOperand(1), VT, false, Cnt)) {
14007  SDLoc dl(N);
14008  return DAG.getNode(ARMISD::VSHLIMM, dl, VT, N->getOperand(0),
14009  DAG.getConstant(Cnt, dl, MVT::i32));
14010  }
14011  break;
14012 
14013  case ISD::SRA:
14014  case ISD::SRL:
14015  if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
14016  unsigned VShiftOpc =
14018  SDLoc dl(N);
14019  return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0),
14020  DAG.getConstant(Cnt, dl, MVT::i32));
14021  }
14022  }
14023  return SDValue();
14024 }
14025 
14026 // Look for a sign/zero extend of a larger than legal load. This can be split
14027 // into two extending loads, which are simpler to deal with than an arbitrary
14028 // sign extend.
14030  SDValue N0 = N->getOperand(0);
14031  if (N0.getOpcode() != ISD::LOAD)
14032  return SDValue();
14033  LoadSDNode *LD = cast<LoadSDNode>(N0.getNode());
14034  if (!LD->isSimple() || !N0.hasOneUse() || LD->isIndexed() ||
14036  return SDValue();
14037  EVT FromVT = LD->getValueType(0);
14038  EVT ToVT = N->getValueType(0);
14039  if (!ToVT.isVector())
14040  return SDValue();
14041  assert(FromVT.getVectorNumElements() == ToVT.getVectorNumElements());
14042  EVT ToEltVT = ToVT.getVectorElementType();
14043  EVT FromEltVT = FromVT.getVectorElementType();
14044 
14045  unsigned NumElements = 0;
14046  if (ToEltVT == MVT::i32 && (FromEltVT == MVT::i16 || FromEltVT == MVT::i8))
14047  NumElements = 4;
14048  if (ToEltVT == MVT::i16 && FromEltVT == MVT::i8)
14049  NumElements = 8;
14050  if (NumElements == 0 ||
14051  FromVT.getVectorNumElements() == NumElements ||
14052  FromVT.getVectorNumElements() % NumElements != 0 ||
14053  !isPowerOf2_32(NumElements))
14054  return SDValue();
14055 
14056  SDLoc DL(LD);
14057  // Details about the old load
14058  SDValue Ch = LD->getChain();
14059  SDValue BasePtr = LD->getBasePtr();
14060  unsigned Alignment = LD->getOriginalAlignment();
14061  MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
14062  AAMDNodes AAInfo = LD->getAAInfo();
14063 
14064  ISD::LoadExtType NewExtType =
14066  SDValue Offset = DAG.getUNDEF(BasePtr.getValueType());
14067  EVT NewFromVT = FromVT.getHalfNumVectorElementsVT(*DAG.getContext());
14068  EVT NewToVT = ToVT.getHalfNumVectorElementsVT(*DAG.getContext());
14069  unsigned NewOffset = NewFromVT.getSizeInBits() / 8;
14070  SDValue NewPtr = DAG.getObjectPtrOffset(DL, BasePtr, NewOffset);
14071 
14072  // Split the load in half, each side of which is extended separately. This
14073  // is good enough, as legalisation will take it from there. They are either
14074  // already legal or they will be split further into something that is
14075  // legal.
14076  SDValue NewLoad1 =
14077  DAG.getLoad(ISD::UNINDEXED, NewExtType, NewToVT, DL, Ch, BasePtr, Offset,
14078  LD->getPointerInfo(), NewFromVT, Alignment, MMOFlags, AAInfo);
14079  SDValue NewLoad2 =
14080  DAG.getLoad(ISD::UNINDEXED, NewExtType, NewToVT, DL, Ch, NewPtr, Offset,
14081  LD->getPointerInfo().getWithOffset(NewOffset), NewFromVT,
14082  Alignment, MMOFlags, AAInfo);
14083 
14084  SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
14085  SDValue(NewLoad1.getNode(), 1),
14086  SDValue(NewLoad2.getNode(), 1));
14087  DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewChain);
14088  return DAG.getNode(ISD::CONCAT_VECTORS, DL, ToVT, NewLoad1, NewLoad2);
14089 }
14090 
14091 /// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND,
14092 /// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND.
14094  const ARMSubtarget *ST) {
14095  SDValue N0 = N->getOperand(0);
14096 
14097  // Check for sign- and zero-extensions of vector extract operations of 8- and
14098  // 16-bit vector elements. NEON and MVE support these directly. They are
14099  // handled during DAG combining because type legalization will promote them
14100  // to 32-bit types and it is messy to recognize the operations after that.
14101  if ((ST->hasNEON() || ST->hasMVEIntegerOps()) &&
14103  SDValue Vec = N0.getOperand(0);
14104  SDValue Lane = N0.getOperand(1);
14105  EVT VT = N->getValueType(0);
14106  EVT EltVT = N0.getValueType();
14107  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14108 
14109  if (VT == MVT::i32 &&
14110  (EltVT == MVT::i8 || EltVT == MVT::i16) &&
14111  TLI.isTypeLegal(Vec.getValueType()) &&
14112  isa<ConstantSDNode>(Lane)) {
14113 
14114  unsigned Opc = 0;
14115  switch (N->getOpcode()) {
14116  default: llvm_unreachable("unexpected opcode");
14117  case ISD::SIGN_EXTEND:
14118  Opc = ARMISD::VGETLANEs;
14119  break;
14120  case ISD::ZERO_EXTEND:
14121  case ISD::ANY_EXTEND:
14122  Opc = ARMISD::VGETLANEu;
14123  break;
14124  }
14125  return DAG.getNode(Opc, SDLoc(N), VT, Vec, Lane);
14126  }
14127  }
14128 
14129  if (ST->hasMVEIntegerOps())
14130  if (SDValue NewLoad = PerformSplittingToWideningLoad(N, DAG))
14131  return NewLoad;
14132 
14133  return SDValue();
14134 }
14135 
14138  if (!C)
14139  return nullptr;
14140  const APInt *CV = &C->getAPIntValue();
14141  return CV->isPowerOf2() ? CV : nullptr;
14142 }
14143 
14145  // If we have a CMOV, OR and AND combination such as:
14146  // if (x & CN)
14147  // y |= CM;
14148  //
14149  // And:
14150  // * CN is a single bit;
14151  // * All bits covered by CM are known zero in y
14152  //
14153  // Then we can convert this into a sequence of BFI instructions. This will
14154  // always be a win if CM is a single bit, will always be no worse than the
14155  // TST&OR sequence if CM is two bits, and for thumb will be no worse if CM is
14156  // three bits (due to the extra IT instruction).
14157 
14158  SDValue Op0 = CMOV->getOperand(0);
14159  SDValue Op1 = CMOV->getOperand(1);
14160  auto CCNode = cast<ConstantSDNode>(CMOV->getOperand(2));
14161  auto CC = CCNode->getAPIntValue().getLimitedValue();
14162  SDValue CmpZ = CMOV->getOperand(4);
14163 
14164  // The compare must be against zero.
14165  if (!isNullConstant(CmpZ->getOperand(1)))
14166  return SDValue();
14167 
14168  assert(CmpZ->getOpcode() == ARMISD::CMPZ);
14169  SDValue And = CmpZ->getOperand(0);
14170  if (And->getOpcode() != ISD::AND)
14171  return SDValue();
14172  const APInt *AndC = isPowerOf2Constant(And->getOperand(1));
14173  if (!AndC)
14174  return SDValue();
14175  SDValue X = And->getOperand(0);
14176 
14177  if (CC == ARMCC::EQ) {
14178  // We're performing an "equal to zero" compare. Swap the operands so we
14179  // canonicalize on a "not equal to zero" compare.
14180  std::swap(Op0, Op1);
14181  } else {
14182  assert(CC == ARMCC::NE && "How can a CMPZ node not be EQ or NE?");
14183  }
14184 
14185  if (Op1->getOpcode() != ISD::OR)
14186  return SDValue();
14187 
14189  if (!OrC)
14190  return SDValue();
14191  SDValue Y = Op1->getOperand(0);
14192 
14193  if (Op0 != Y)
14194  return SDValue();
14195 
14196  // Now, is it profitable to continue?
14197  APInt OrCI = OrC->getAPIntValue();
14198  unsigned Heuristic = Subtarget->isThumb() ? 3 : 2;
14199  if (OrCI.countPopulation() > Heuristic)
14200  return SDValue();
14201 
14202  // Lastly, can we determine that the bits defined by OrCI
14203  // are zero in Y?
14204  KnownBits Known = DAG.computeKnownBits(Y);
14205  if ((OrCI & Known.Zero) != OrCI)
14206  return SDValue();
14207 
14208  // OK, we can do the combine.
14209  SDValue V = Y;
14210  SDLoc dl(X);
14211  EVT VT = X.getValueType();
14212  unsigned BitInX = AndC->logBase2();
14213 
14214  if (BitInX != 0) {
14215  // We must shift X first.
14216  X = DAG.getNode(ISD::SRL, dl, VT, X,
14217  DAG.getConstant(BitInX, dl, VT));
14218  }
14219 
14220  for (unsigned BitInY = 0, NumActiveBits = OrCI.getActiveBits();
14221  BitInY < NumActiveBits; ++BitInY) {
14222  if (OrCI[BitInY] == 0)
14223  continue;
14224  APInt Mask(VT.getSizeInBits(), 0);
14225  Mask.setBit(BitInY);
14226  V = DAG.getNode(ARMISD::BFI, dl, VT, V, X,
14227  // Confusingly, the operand is an *inverted* mask.
14228  DAG.getConstant(~Mask, dl, VT));
14229  }
14230 
14231  return V;
14232 }
14233 
14234 // Given N, the value controlling the conditional branch, search for the loop
14235 // intrinsic, returning it, along with how the value is used. We need to handle
14236 // patterns such as the following:
14237 // (brcond (xor (setcc (loop.decrement), 0, ne), 1), exit)
14238 // (brcond (setcc (loop.decrement), 0, eq), exit)
14239 // (brcond (setcc (loop.decrement), 0, ne), header)
14241  bool &Negate) {
14242  switch (N->getOpcode()) {
14243  default:
14244  break;
14245  case ISD::XOR: {
14246  if (!isa<ConstantSDNode>(N.getOperand(1)))
14247  return SDValue();
14248  if (!cast<ConstantSDNode>(N.getOperand(1))->isOne())
14249  return SDValue();
14250  Negate = !Negate;
14251  return SearchLoopIntrinsic(N.getOperand(0), CC, Imm, Negate);
14252  }
14253  case ISD::SETCC: {
14254  auto *Const = dyn_cast<ConstantSDNode>(N.getOperand(1));
14255  if (!Const)
14256  return SDValue();
14257  if (Const->isNullValue())
14258  Imm = 0;
14259  else if (Const->isOne())
14260  Imm = 1;
14261  else
14262  return SDValue();
14263  CC = cast<CondCodeSDNode>(N.getOperand(2))->get();
14264  return SearchLoopIntrinsic(N->getOperand(0), CC, Imm, Negate);
14265  }
14266  case ISD::INTRINSIC_W_CHAIN: {
14267  unsigned IntOp = cast<ConstantSDNode>(N.getOperand(1))->getZExtValue();
14268  if (IntOp != Intrinsic::test_set_loop_iterations &&
14269  IntOp != Intrinsic::loop_decrement_reg)
14270  return SDValue();
14271  return N;
14272  }
14273  }
14274  return SDValue();
14275 }
14276 
14279  const ARMSubtarget *ST) {
14280 
14281  // The hwloop intrinsics that we're interested are used for control-flow,
14282  // either for entering or exiting the loop:
14283  // - test.set.loop.iterations will test whether its operand is zero. If it
14284  // is zero, the proceeding branch should not enter the loop.
14285  // - loop.decrement.reg also tests whether its operand is zero. If it is
14286  // zero, the proceeding branch should not branch back to the beginning of
14287  // the loop.
14288  // So here, we need to check that how the brcond is using the result of each
14289  // of the intrinsics to ensure that we're branching to the right place at the
14290  // right time.
14291 
14292  ISD::CondCode CC;
14293  SDValue Cond;
14294  int Imm = 1;
14295  bool Negate = false;
14296  SDValue Chain = N->getOperand(0);
14297  SDValue Dest;
14298 
14299  if (N->getOpcode() == ISD::BRCOND) {
14300  CC = ISD::SETEQ;
14301  Cond = N->getOperand(1);
14302  Dest = N->getOperand(2);
14303  } else {
14304  assert(N->getOpcode() == ISD::BR_CC && "Expected BRCOND or BR_CC!");
14305  CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
14306  Cond = N->getOperand(2);
14307  Dest = N->getOperand(4);
14308  if (auto *Const = dyn_cast<ConstantSDNode>(N->getOperand(3))) {
14309  if (!Const->isOne() && !Const->isNullValue())
14310  return SDValue();
14311  Imm = Const->getZExtValue();
14312  } else
14313  return SDValue();
14314  }
14315 
14316  SDValue Int = SearchLoopIntrinsic(Cond, CC, Imm, Negate);
14317  if (!Int)
14318  return SDValue();
14319 
14320  if (Negate)
14321  CC = ISD::getSetCCInverse(CC, /* Integer inverse */ MVT::i32);
14322 
14323  auto IsTrueIfZero = [](ISD::CondCode CC, int Imm) {
14324  return (CC == ISD::SETEQ && Imm == 0) ||
14325  (CC == ISD::SETNE && Imm == 1) ||
14326  (CC == ISD::SETLT && Imm == 1) ||
14327  (CC == ISD::SETULT && Imm == 1);
14328  };
14329 
14330  auto IsFalseIfZero = [](ISD::CondCode CC, int Imm) {
14331  return (CC == ISD::SETEQ && Imm == 1) ||
14332  (CC == ISD::SETNE && Imm == 0) ||
14333  (CC == ISD::SETGT && Imm == 0) ||
14334  (CC == ISD::SETUGT && Imm == 0) ||
14335  (CC == ISD::SETGE && Imm == 1) ||
14336  (CC == ISD::SETUGE && Imm == 1);
14337  };
14338 
14339  assert((IsTrueIfZero(CC, Imm) || IsFalseIfZero(CC, Imm)) &&
14340  "unsupported condition");
14341 
14342  SDLoc dl(Int);
14343  SelectionDAG &DAG = DCI.DAG;
14344  SDValue Elements = Int.getOperand(2);
14345  unsigned IntOp = cast<ConstantSDNode>(Int->getOperand(1))->getZExtValue();
14346  assert((N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BR)
14347  && "expected single br user");
14348  SDNode *Br = *N->use_begin();
14349  SDValue OtherTarget = Br->getOperand(1);
14350 
14351  // Update the unconditional branch to branch to the given Dest.
14352  auto UpdateUncondBr = [](SDNode *Br, SDValue Dest, SelectionDAG &DAG) {
14353  SDValue NewBrOps[] = { Br->getOperand(0), Dest };
14354  SDValue NewBr = DAG.getNode(ISD::BR, SDLoc(Br), MVT::Other, NewBrOps);
14355  DAG.ReplaceAllUsesOfValueWith(SDValue(Br, 0), NewBr);
14356  };
14357 
14358  if (IntOp == Intrinsic::test_set_loop_iterations) {
14359  SDValue Res;
14360  // We expect this 'instruction' to branch when the counter is zero.
14361  if (IsTrueIfZero(CC, Imm)) {
14362  SDValue Ops[] = { Chain, Elements, Dest };
14363  Res = DAG.getNode(ARMISD::WLS, dl, MVT::Other, Ops);
14364  } else {
14365  // The logic is the reverse of what we need for WLS, so find the other
14366  // basic block target: the target of the proceeding br.
14367  UpdateUncondBr(Br, Dest, DAG);
14368 
14369  SDValue Ops[] = { Chain, Elements, OtherTarget };
14370  Res = DAG.getNode(ARMISD::WLS, dl, MVT::Other, Ops);
14371  }
14372  DAG.ReplaceAllUsesOfValueWith(Int.getValue(1), Int.getOperand(0));
14373  return Res;
14374  } else {
14375  SDValue Size = DAG.getTargetConstant(
14376  cast<ConstantSDNode>(Int.getOperand(3))->getZExtValue(), dl, MVT::i32);
14377  SDValue Args[] = { Int.getOperand(0), Elements, Size, };
14378  SDValue LoopDec = DAG.getNode(ARMISD::LOOP_DEC, dl,
14380  DAG.ReplaceAllUsesWith(Int.getNode(), LoopDec.getNode());
14381 
14382  // We expect this instruction to branch when the count is not zero.
14383  SDValue Target = IsFalseIfZero(CC, Imm) ? Dest : OtherTarget;
14384 
14385  // Update the unconditional branch to target the loop preheader if we've
14386  // found the condition has been reversed.
14387  if (Target == OtherTarget)
14388  UpdateUncondBr(Br, Dest, DAG);
14389 
14390  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
14391  SDValue(LoopDec.getNode(), 1), Chain);
14392 
14393  SDValue EndArgs[] = { Chain, SDValue(LoopDec.getNode(), 0), Target };
14394  return DAG.getNode(ARMISD::LE, dl, MVT::Other, EndArgs);
14395  }
14396  return SDValue();
14397 }
14398 
14399 /// PerformBRCONDCombine - Target-specific DAG combining for ARMISD::BRCOND.
14400 SDValue
14402  SDValue Cmp = N->getOperand(4);
14403  if (Cmp.getOpcode() != ARMISD::CMPZ)
14404  // Only looking at NE cases.
14405  return SDValue();
14406 
14407  EVT VT = N->getValueType(0);
14408  SDLoc dl(N);
14409  SDValue LHS = Cmp.getOperand(0);
14410  SDValue RHS = Cmp.getOperand(1);
14411  SDValue Chain = N->getOperand(0);
14412  SDValue BB = N->getOperand(1);
14413  SDValue ARMcc = N->getOperand(2);
14414  ARMCC::CondCodes CC =
14415  (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
14416 
14417  // (brcond Chain BB ne CPSR (cmpz (and (cmov 0 1 CC CPSR Cmp) 1) 0))
14418  // -> (brcond Chain BB CC CPSR Cmp)
14419  if (CC == ARMCC::NE && LHS.getOpcode() == ISD::AND && LHS->hasOneUse() &&
14420  LHS->getOperand(0)->getOpcode() == ARMISD::CMOV &&
14421  LHS->getOperand(0)->hasOneUse()) {
14422  auto *LHS00C = dyn_cast<ConstantSDNode>(LHS->getOperand(0)->getOperand(0));
14423  auto *LHS01C = dyn_cast<ConstantSDNode>(LHS->getOperand(0)->getOperand(1));
14424  auto *LHS1C = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
14425  auto *RHSC = dyn_cast<ConstantSDNode>(RHS);
14426  if ((LHS00C && LHS00C->getZExtValue() == 0) &&
14427  (LHS01C && LHS01C->getZExtValue() == 1) &&
14428  (LHS1C && LHS1C->getZExtValue() == 1) &&
14429  (RHSC && RHSC->getZExtValue() == 0)) {
14430  return DAG.getNode(
14431  ARMISD::BRCOND, dl, VT, Chain, BB, LHS->getOperand(0)->getOperand(2),
14432  LHS->getOperand(0)->getOperand(3), LHS->getOperand(0)->getOperand(4));
14433  }
14434  }
14435 
14436  return SDValue();
14437 }
14438 
14439 /// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.
14440 SDValue
14442  SDValue Cmp = N->getOperand(4);
14443  if (Cmp.getOpcode() != ARMISD::CMPZ)
14444  // Only looking at EQ and NE cases.
14445  return SDValue();
14446 
14447  EVT VT = N->getValueType(0);
14448  SDLoc dl(N);
14449  SDValue LHS = Cmp.getOperand(0);
14450  SDValue RHS = Cmp.getOperand(1);
14451  SDValue FalseVal = N->getOperand(0);
14452  SDValue TrueVal = N->getOperand(1);
14453  SDValue ARMcc = N->getOperand(2);
14454  ARMCC::CondCodes CC =
14455  (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
14456 
14457  // BFI is only available on V6T2+.
14458  if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops()) {
14459  SDValue R = PerformCMOVToBFICombine(N, DAG);
14460  if (R)
14461  return R;
14462  }
14463 
14464  // Simplify
14465  // mov r1, r0
14466  // cmp r1, x
14467  // mov r0, y
14468  // moveq r0, x
14469  // to
14470  // cmp r0, x
14471  // movne r0, y
14472  //
14473  // mov r1, r0
14474  // cmp r1, x
14475  // mov r0, x
14476  // movne r0, y
14477  // to
14478  // cmp r0, x
14479  // movne r0, y
14480  /// FIXME: Turn this into a target neutral optimization?
14481  SDValue Res;
14482  if (CC == ARMCC::NE && FalseVal == RHS && FalseVal != LHS) {
14483  Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, TrueVal, ARMcc,
14484  N->getOperand(3), Cmp);
14485  } else if (CC == ARMCC::EQ && TrueVal == RHS) {
14486  SDValue ARMcc;
14487  SDValue NewCmp = getARMCmp(LHS, RHS, ISD::SETNE, ARMcc, DAG, dl);
14488  Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, FalseVal, ARMcc,
14489  N->getOperand(3), NewCmp);
14490  }
14491 
14492  // (cmov F T ne CPSR (cmpz (cmov 0 1 CC CPSR Cmp) 0))
14493  // -> (cmov F T CC CPSR Cmp)
14494  if (CC == ARMCC::NE && LHS.getOpcode() == ARMISD::CMOV && LHS->hasOneUse()) {
14495  auto *LHS0C = dyn_cast<ConstantSDNode>(LHS->getOperand(0));
14496  auto *LHS1C = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
14497  auto *RHSC = dyn_cast<ConstantSDNode>(RHS);
14498  if ((LHS0C && LHS0C->getZExtValue() == 0) &&
14499  (LHS1C && LHS1C->getZExtValue() == 1) &&
14500  (RHSC && RHSC->getZExtValue() == 0)) {
14501  return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
14502  LHS->getOperand(2), LHS->getOperand(3),
14503  LHS->getOperand(4));
14504  }
14505  }
14506 
14507  if (!VT.isInteger())
14508  return SDValue();
14509 
14510  // Materialize a boolean comparison for integers so we can avoid branching.
14511  if (isNullConstant(FalseVal)) {
14512  if (CC == ARMCC::EQ && isOneConstant(TrueVal)) {
14513  if (!Subtarget->isThumb1Only() && Subtarget->hasV5TOps()) {
14514  // If x == y then x - y == 0 and ARM's CLZ will return 32, shifting it
14515  // right 5 bits will make that 32 be 1, otherwise it will be 0.
14516  // CMOV 0, 1, ==, (CMPZ x, y) -> SRL (CTLZ (SUB x, y)), 5
14517  SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
14518  Res = DAG.getNode(ISD::SRL, dl, VT, DAG.getNode(ISD::CTLZ, dl, VT, Sub),
14519  DAG.getConstant(5, dl, MVT::i32));
14520  } else {
14521  // CMOV 0, 1, ==, (CMPZ x, y) ->
14522  // (ADDCARRY (SUB x, y), t:0, t:1)
14523  // where t = (SUBCARRY 0, (SUB x, y), 0)
14524  //
14525  // The SUBCARRY computes 0 - (x - y) and this will give a borrow when
14526  // x != y. In other words, a carry C == 1 when x == y, C == 0
14527  // otherwise.
14528  // The final ADDCARRY computes
14529  // x - y + (0 - (x - y)) + C == C
14530  SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
14531  SDVTList VTs = DAG.getVTList(VT, MVT::i32);
14532  SDValue Neg = DAG.getNode(ISD::USUBO, dl, VTs, FalseVal, Sub);
14533  // ISD::SUBCARRY returns a borrow but we want the carry here
14534  // actually.
14535  SDValue Carry =
14536  DAG.getNode(ISD::SUB, dl, MVT::i32,
14537  DAG.getConstant(1, dl, MVT::i32), Neg.getValue(1));
14538  Res = DAG.getNode(ISD::ADDCARRY, dl, VTs, Sub, Neg, Carry);
14539  }
14540  } else if (CC == ARMCC::NE && !isNullConstant(RHS) &&
14541  (!Subtarget->isThumb1Only() || isPowerOf2Constant(TrueVal))) {
14542  // This seems pointless but will allow us to combine it further below.
14543  // CMOV 0, z, !=, (CMPZ x, y) -> CMOV (SUBS x, y), z, !=, (SUBS x, y):1
14544  SDValue Sub =
14545  DAG.getNode(ARMISD::SUBS, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS);
14546  SDValue CPSRGlue = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR,
14547  Sub.getValue(1), SDValue());
14548  Res = DAG.getNode(ARMISD::CMOV, dl, VT, Sub, TrueVal, ARMcc,
14549  N->getOperand(3), CPSRGlue.getValue(1));
14550  FalseVal = Sub;
14551  }
14552  } else if (isNullConstant(TrueVal)) {
14553  if (CC == ARMCC::EQ && !isNullConstant(RHS) &&
14554  (!Subtarget->isThumb1Only() || isPowerOf2Constant(FalseVal))) {
14555  // This seems pointless but will allow us to combine it further below
14556  // Note that we change == for != as this is the dual for the case above.
14557  // CMOV z, 0, ==, (CMPZ x, y) -> CMOV (SUBS x, y), z, !=, (SUBS x, y):1
14558  SDValue Sub =
14559  DAG.getNode(ARMISD::SUBS, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS);
14560  SDValue CPSRGlue = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR,
14561  Sub.getValue(1), SDValue());
14562  Res = DAG.getNode(ARMISD::CMOV, dl, VT, Sub, FalseVal,
14563  DAG.getConstant(ARMCC::NE, dl, MVT::i32),
14564  N->getOperand(3), CPSRGlue.getValue(1));
14565  FalseVal = Sub;
14566  }
14567  }
14568 
14569  // On Thumb1, the DAG above may be further combined if z is a power of 2
14570  // (z == 2 ^ K).
14571  // CMOV (SUBS x, y), z, !=, (SUBS x, y):1 ->
14572  // t1 = (USUBO (SUB x, y), 1)
14573  // t2 = (SUBCARRY (SUB x, y), t1:0, t1:1)
14574  // Result = if K != 0 then (SHL t2:0, K) else t2:0
14575  //
14576  // This also handles the special case of comparing against zero; it's
14577  // essentially, the same pattern, except there's no SUBS:
14578  // CMOV x, z, !=, (CMPZ x, 0) ->
14579  // t1 = (USUBO x, 1)
14580  // t2 = (SUBCARRY x, t1:0, t1:1)
14581  // Result = if K != 0 then (SHL t2:0, K) else t2:0
14582  const APInt *TrueConst;
14583  if (Subtarget->isThumb1Only() && CC == ARMCC::NE &&
14584  ((FalseVal.getOpcode() == ARMISD::SUBS &&
14585  FalseVal.getOperand(0) == LHS && FalseVal.getOperand(1) == RHS) ||
14586  (FalseVal == LHS && isNullConstant(RHS))) &&
14587  (TrueConst = isPowerOf2Constant(TrueVal))) {
14588  SDVTList VTs = DAG.getVTList(VT, MVT::i32);
14589  unsigned ShiftAmount = TrueConst->logBase2();
14590  if (ShiftAmount)
14591  TrueVal = DAG.getConstant(1, dl, VT);
14592  SDValue Subc = DAG.getNode(ISD::USUBO, dl, VTs, FalseVal, TrueVal);
14593  Res = DAG.getNode(ISD::SUBCARRY, dl, VTs, FalseVal, Subc, Subc.getValue(1));
14594 
14595  if (ShiftAmount)
14596  Res = DAG.getNode(ISD::SHL, dl, VT, Res,
14597  DAG.getConstant(ShiftAmount, dl, MVT::i32));
14598  }
14599 
14600  if (Res.getNode()) {
14601  KnownBits Known = DAG.computeKnownBits(SDValue(N,0));
14602  // Capture demanded bits information that would be otherwise lost.
14603  if (Known.Zero == 0xfffffffe)
14604  Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
14605  DAG.getValueType(MVT::i1));
14606  else if (Known.Zero == 0xffffff00)
14607  Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
14608  DAG.getValueType(MVT::i8));
14609  else if (Known.Zero == 0xffff0000)
14610  Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
14611  DAG.getValueType(MVT::i16));
14612  }
14613 
14614  return Res;
14615 }
14616 
14618  DAGCombinerInfo &DCI) const {
14619  switch (N->getOpcode()) {
14620  default: break;
14621  case ISD::ABS: return PerformABSCombine(N, DCI, Subtarget);
14622  case ARMISD::ADDE: return PerformADDECombine(N, DCI, Subtarget);
14623  case ARMISD::UMLAL: return PerformUMLALCombine(N, DCI.DAG, Subtarget);
14624  case ISD::ADD: return PerformADDCombine(N, DCI, Subtarget);
14625  case ISD::SUB: return PerformSUBCombine(N, DCI, Subtarget);
14626  case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget);
14627  case ISD::OR: return PerformORCombine(N, DCI, Subtarget);
14628  case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget);
14629  case ISD::AND: return PerformANDCombine(N, DCI, Subtarget);
14630  case ISD::BRCOND:
14631  case ISD::BR_CC: return PerformHWLoopCombine(N, DCI, Subtarget);
14632  case ARMISD::ADDC:
14633  case ARMISD::SUBC: return PerformAddcSubcCombine(N, DCI, Subtarget);
14634  case ARMISD::SUBE: return PerformAddeSubeCombine(N, DCI, Subtarget);
14635  case ARMISD::BFI: return PerformBFICombine(N, DCI);
14636  case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI, Subtarget);
14637  case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG);
14638  case ISD::STORE: return PerformSTORECombine(N, DCI, Subtarget);
14639  case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI, Subtarget);
14640  case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI);
14642  case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI);
14643  case ARMISD::VDUP: return PerformVDUPCombine(N, DCI, Subtarget);
14644  case ISD::FP_TO_SINT:
14645  case ISD::FP_TO_UINT:
14646  return PerformVCVTCombine(N, DCI.DAG, Subtarget);
14647  case ISD::FDIV:
14648  return PerformVDIVCombine(N, DCI.DAG, Subtarget);
14650  case ISD::SHL:
14651  case ISD::SRA:
14652  case ISD::SRL:
14653  return PerformShiftCombine(N, DCI, Subtarget);
14654  case ISD::SIGN_EXTEND:
14655  case ISD::ZERO_EXTEND:
14656  case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget);
14657  case ARMISD::CMOV: return PerformCMOVCombine(N, DCI.DAG);
14658  case ARMISD::BRCOND: return PerformBRCONDCombine(N, DCI.DAG);
14659  case ISD::LOAD: return PerformLOADCombine(N, DCI);
14660  case ARMISD::VLD1DUP:
14661  case ARMISD::VLD2DUP:
14662  case ARMISD::VLD3DUP:
14663  case ARMISD::VLD4DUP:
14664  return PerformVLDCombine(N, DCI);
14665  case ARMISD::BUILD_VECTOR:
14666  return PerformARMBUILD_VECTORCombine(N, DCI);
14668  return PerformPREDICATE_CASTCombine(N, DCI);
14669  case ARMISD::VCMP:
14670  return PerformVCMPCombine(N, DCI, Subtarget);
14671  case ARMISD::SMULWB: {
14672  unsigned BitWidth = N->getValueType(0).getSizeInBits();
14673  APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16);
14674  if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))
14675  return SDValue();
14676  break;
14677  }
14678  case ARMISD::SMULWT: {
14679  unsigned BitWidth = N->getValueType(0).getSizeInBits();
14680  APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 16);
14681  if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))
14682  return SDValue();
14683  break;
14684  }
14685  case ARMISD::SMLALBB:
14686  case ARMISD::QADD16b:
14687  case ARMISD::QSUB16b: {
14688  unsigned BitWidth = N->getValueType(0).getSizeInBits();
14689  APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16);
14690  if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
14691  (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
14692  return SDValue();
14693  break;
14694  }
14695  case ARMISD::SMLALBT: {
14696  unsigned LowWidth = N->getOperand(0).getValueType().getSizeInBits();
14697  APInt LowMask = APInt::getLowBitsSet(LowWidth, 16);
14698  unsigned HighWidth = N->getOperand(1).getValueType().getSizeInBits();
14699  APInt HighMask = APInt::getHighBitsSet(HighWidth, 16);
14700  if ((SimplifyDemandedBits(N->getOperand(0), LowMask, DCI)) ||
14701  (SimplifyDemandedBits(N->getOperand(1), HighMask, DCI)))
14702  return SDValue();
14703  break;
14704  }
14705  case ARMISD::SMLALTB: {
14706  unsigned HighWidth = N->getOperand(0).getValueType().getSizeInBits();
14707  APInt HighMask = APInt::getHighBitsSet(HighWidth, 16);
14708  unsigned LowWidth = N->getOperand(1).getValueType().getSizeInBits();
14709  APInt LowMask = APInt::getLowBitsSet(LowWidth, 16);
14710  if ((SimplifyDemandedBits(N->getOperand(0), HighMask, DCI)) ||
14711  (SimplifyDemandedBits(N->getOperand(1), LowMask, DCI)))
14712  return SDValue();
14713  break;
14714  }
14715  case ARMISD::SMLALTT: {
14716  unsigned BitWidth = N->getValueType(0).getSizeInBits();
14717  APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 16);
14718  if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
14719  (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
14720  return SDValue();
14721  break;
14722  }
14723  case ARMISD::QADD8b:
14724  case ARMISD::QSUB8b: {
14725  unsigned BitWidth = N->getValueType(0).getSizeInBits();
14726  APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 8);
14727  if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
14728  (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
14729  return SDValue();
14730  break;
14731  }
14732  case ISD::INTRINSIC_VOID:
14734  switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
14735  case Intrinsic::arm_neon_vld1:
14736  case Intrinsic::arm_neon_vld1x2:
14737  case Intrinsic::arm_neon_vld1x3:
14738  case Intrinsic::arm_neon_vld1x4:
14739  case Intrinsic::arm_neon_vld2:
14740  case Intrinsic::arm_neon_vld3:
14741  case Intrinsic::arm_neon_vld4:
14742  case Intrinsic::arm_neon_vld2lane:
14743  case Intrinsic::arm_neon_vld3lane:
14744  case Intrinsic::arm_neon_vld4lane:
14745  case Intrinsic::arm_neon_vld2dup:
14746  case Intrinsic::arm_neon_vld3dup:
14747  case Intrinsic::arm_neon_vld4dup:
14748  case Intrinsic::arm_neon_vst1:
14749  case Intrinsic::arm_neon_vst1x2:
14750  case Intrinsic::arm_neon_vst1x3:
14751  case Intrinsic::arm_neon_vst1x4:
14752  case Intrinsic::arm_neon_vst2:
14753  case Intrinsic::arm_neon_vst3:
14754  case Intrinsic::arm_neon_vst4:
14755  case Intrinsic::arm_neon_vst2lane:
14756  case Intrinsic::arm_neon_vst3lane:
14757  case Intrinsic::arm_neon_vst4lane:
14758  return PerformVLDCombine(N, DCI);
14759  default: break;
14760  }
14761  break;
14762  }
14763  return SDValue();
14764 }
14765 
14767  EVT VT) const {
14768  return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE);
14769 }
14770 
14772  unsigned Alignment,
14774  bool *Fast) const {
14775  // Depends what it gets converted into if the type is weird.
14776  if (!VT.isSimple())
14777  return false;
14778 
14779  // The AllowsUnaligned flag models the SCTLR.A setting in ARM cpus
14780  bool AllowsUnaligned = Subtarget->allowsUnalignedMem();
14781  auto Ty = VT.getSimpleVT().SimpleTy;
14782 
14783  if (Ty == MVT::i8 || Ty == MVT::i16 || Ty == MVT::i32) {
14784  // Unaligned access can use (for example) LRDB, LRDH, LDR
14785  if (AllowsUnaligned) {
14786  if (Fast)
14787  *Fast = Subtarget->hasV7Ops();
14788  return true;
14789  }
14790  }
14791 
14792  if (Ty == MVT::f64 || Ty == MVT::v2f64) {
14793  // For any little-endian targets with neon, we can support unaligned ld/st
14794  // of D and Q (e.g. {D0,D1}) registers by using vld1.i8/vst1.i8.
14795  // A big-endian target may also explicitly support unaligned accesses
14796  if (Subtarget->hasNEON() && (AllowsUnaligned || Subtarget->isLittle())) {
14797  if (Fast)
14798  *Fast = true;
14799  return true;
14800  }
14801  }
14802 
14803  if (!Subtarget->hasMVEIntegerOps())
14804  return false;
14805 
14806  // These are for predicates
14807  if ((Ty == MVT::v16i1 || Ty == MVT::v8i1 || Ty == MVT::v4i1)) {
14808  if (Fast)
14809  *Fast = true;
14810  return true;
14811  }
14812 
14813  // These are for truncated stores/narrowing loads. They are fine so long as
14814  // the alignment is at least the size of the item being loaded
14815  if ((Ty == MVT::v4i8 || Ty == MVT::v8i8 || Ty == MVT::v4i16) &&
14816  Alignment >= VT.getScalarSizeInBits() / 8) {
14817  if (Fast)
14818  *Fast = true;
14819  return true;
14820  }
14821 
14822  // In little-endian MVE, the store instructions VSTRB.U8, VSTRH.U16 and
14823  // VSTRW.U32 all store the vector register in exactly the same format, and
14824  // differ only in the range of their immediate offset field and the required
14825  // alignment. So there is always a store that can be used, regardless of
14826  // actual type.
14827  //
14828  // For big endian, that is not the case. But can still emit a (VSTRB.U8;
14829  // VREV64.8) pair and get the same effect. This will likely be better than
14830  // aligning the vector through the stack.
14831  if (Ty == MVT::v16i8 || Ty == MVT::v8i16 || Ty == MVT::v8f16 ||
14832  Ty == MVT::v4i32 || Ty == MVT::v4f32 || Ty == MVT::v2i64 ||
14833  Ty == MVT::v2f64) {
14834  if (Fast)
14835  *Fast = true;
14836  return true;
14837  }
14838 
14839  return false;
14840 }
14841 
14842 static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign,
14843  unsigned AlignCheck) {
14844  return ((SrcAlign == 0 || SrcAlign % AlignCheck == 0) &&
14845  (DstAlign == 0 || DstAlign % AlignCheck == 0));
14846 }
14847 
14849  uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset,
14850  bool ZeroMemset, bool MemcpyStrSrc,
14851  const AttributeList &FuncAttributes) const {
14852  // See if we can use NEON instructions for this...
14853  if ((!IsMemset || ZeroMemset) && Subtarget->hasNEON() &&
14854  !FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat)) {
14855  bool Fast;
14856  if (Size >= 16 &&
14857  (memOpAlign(SrcAlign, DstAlign, 16) ||
14859  MachineMemOperand::MONone, &Fast) &&
14860  Fast))) {
14861  return MVT::v2f64;
14862  } else if (Size >= 8 &&
14863  (memOpAlign(SrcAlign, DstAlign, 8) ||
14865  MVT::f64, 0, 1, MachineMemOperand::MONone, &Fast) &&
14866  Fast))) {
14867  return MVT::f64;
14868  }
14869  }
14870 
14871  // Let the target-independent logic figure it out.
14872  return MVT::Other;
14873 }
14874 
14875 // 64-bit integers are split into their high and low parts and held in two
14876 // different registers, so the trunc is free since the low register can just
14877 // be used.
14878 bool ARMTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
14879  if (!SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
14880  return false;
14881  unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
14882  unsigned DestBits = DstTy->getPrimitiveSizeInBits();
14883  return (SrcBits == 64 && DestBits == 32);
14884 }
14885 
14886 bool ARMTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const {
14887  if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
14888  !DstVT.isInteger())
14889  return false;
14890  unsigned SrcBits = SrcVT.getSizeInBits();
14891  unsigned DestBits = DstVT.getSizeInBits();
14892  return (SrcBits == 64 && DestBits == 32);
14893 }
14894 
14896  if (Val.getOpcode() != ISD::LOAD)
14897  return false;
14898 
14899  EVT VT1 = Val.getValueType();
14900  if (!VT1.isSimple() || !VT1.isInteger() ||
14901  !VT2.isSimple() || !VT2.isInteger())
14902  return false;
14903 
14904  switch (VT1.getSimpleVT().SimpleTy) {
14905  default: break;
14906  case MVT::i1:
14907  case MVT::i8:
14908  case MVT::i16:
14909  // 8-bit and 16-bit loads implicitly zero-extend to 32-bits.
14910  return true;
14911  }
14912 
14913  return false;
14914 }
14915 
14917  if (!VT.isSimple())
14918  return false;
14919 
14920  // There are quite a few FP16 instructions (e.g. VNMLA, VNMLS, etc.) that
14921  // negate values directly (fneg is free). So, we don't want to let the DAG
14922  // combiner rewrite fneg into xors and some other instructions. For f16 and
14923  // FullFP16 argument passing, some bitcast nodes may be introduced,
14924  // triggering this DAG combine rewrite, so we are avoiding that with this.
14925  switch (VT.getSimpleVT().SimpleTy) {
14926  default: break;
14927  case MVT::f16:
14928  return Subtarget->hasFullFP16();
14929  }
14930 
14931  return false;
14932 }
14933 
14934 /// Check if Ext1 and Ext2 are extends of the same type, doubling the bitwidth
14935 /// of the vector elements.
14936 static bool areExtractExts(Value *Ext1, Value *Ext2) {
14937  auto areExtDoubled = [](Instruction *Ext) {
14938  return Ext->getType()->getScalarSizeInBits() ==
14939  2 * Ext->getOperand(0)->getType()->getScalarSizeInBits();
14940  };
14941 
14942  if (!match(Ext1, m_ZExtOrSExt(m_Value())) ||
14943  !match(Ext2, m_ZExtOrSExt(m_Value())) ||
14944  !areExtDoubled(cast<Instruction>(Ext1)) ||
14945  !areExtDoubled(cast<Instruction>(Ext2)))
14946  return false;
14947 
14948  return true;
14949 }
14950 
14951 /// Check if sinking \p I's operands to I's basic block is profitable, because
14952 /// the operands can be folded into a target instruction, e.g.
14953 /// sext/zext can be folded into vsubl.
14955  SmallVectorImpl<Use *> &Ops) const {
14956  if (!I->getType()->isVectorTy())
14957  return false;
14958 
14959  if (Subtarget->hasNEON()) {
14960  switch (I->getOpcode()) {
14961  case Instruction::Sub:
14962  case Instruction::Add: {
14963  if (!areExtractExts(I->getOperand(0), I->getOperand(1)))
14964  return false;
14965  Ops.push_back(&I->getOperandUse(0));
14966  Ops.push_back(&I->getOperandUse(1));
14967  return true;
14968  }
14969  default:
14970  return false;
14971  }
14972  }
14973 
14974  if (!Subtarget->hasMVEIntegerOps())
14975  return false;
14976 
14977  auto IsSinker = [](Instruction *I, int Operand) {
14978  switch (I->getOpcode()) {
14979  case Instruction::Add:
14980  case Instruction::Mul:
14981  case Instruction::ICmp:
14982  return true;
14983  case Instruction::Sub:
14984  case Instruction::Shl:
14985  case Instruction::LShr:
14986  case Instruction::AShr:
14987  return Operand == 1;
14988  default:
14989  return false;
14990  }
14991  };
14992 
14993  int Op = 0;
14994  if (!isa<ShuffleVectorInst>(I->getOperand(Op)))
14995  Op = 1;
14996  if (!IsSinker(I, Op))
14997  return false;
14998  if (!match(I->getOperand(Op),
15000  m_Undef(), m_Zero()))) {
15001  return false;
15002  }
15003  Instruction *Shuffle = cast<Instruction>(I->getOperand(Op));
15004  // All uses of the shuffle should be sunk to avoid duplicating it across gpr
15005  // and vector registers
15006  for (Use &U : Shuffle->uses()) {
15007  Instruction *Insn = cast<Instruction>(U.getUser());
15008  if (!IsSinker(Insn, U.getOperandNo()))
15009  return false;
15010  }
15011  Ops.push_back(&Shuffle->getOperandUse(0));
15012  Ops.push_back(&I->getOperandUse(Op));
15013  return true;
15014 }
15015 
15017  EVT VT = ExtVal.getValueType();
15018 
15019  if (!isTypeLegal(VT))
15020  return false;
15021 
15022  if (auto *Ld = dyn_cast<MaskedLoadSDNode>(ExtVal.getOperand(0))) {
15023  if (Ld->isExpandingLoad())
15024  return false;
15025  }
15026 
15027  // Don't create a loadext if we can fold the extension into a wide/long
15028  // instruction.
15029  // If there's more than one user instruction, the loadext is desirable no
15030  // matter what. There can be two uses by the same instruction.
15031  if (ExtVal->use_empty() ||
15032  !ExtVal->use_begin()->isOnlyUserOf(ExtVal.getNode()))
15033  return true;
15034 
15035  SDNode *U = *ExtVal->use_begin();
15036  if ((U->getOpcode() == ISD::ADD || U->getOpcode() == ISD::SUB ||
15037  U->getOpcode() == ISD::SHL || U->getOpcode() == ARMISD::VSHLIMM))
15038  return false;
15039 
15040  return true;
15041 }
15042 
15044  if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
15045  return false;
15046 
15047  if (!isTypeLegal(EVT::getEVT(Ty1)))
15048  return false;
15049 
15050  assert(Ty1->getPrimitiveSizeInBits() <= 64 && "i128 is probably not a noop");
15051 
15052  // Assuming the caller doesn't have a zeroext or signext return parameter,
15053  // truncation all the way down to i1 is valid.
15054  return true;
15055 }
15056 
15058  const AddrMode &AM, Type *Ty,
15059  unsigned AS) const {
15060  if (isLegalAddressingMode(DL, AM, Ty, AS)) {
15061  if (Subtarget->hasFPAO())
15062  return AM.Scale < 0 ? 1 : 0; // positive offsets execute faster
15063  return 0;
15064  }
15065  return -1;
15066 }
15067 
15068 /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
15069 /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
15070 /// expanded to FMAs when this method returns true, otherwise fmuladd is
15071 /// expanded to fmul + fadd.
15072 ///
15073 /// ARM supports both fused and unfused multiply-add operations; we already
15074 /// lower a pair of fmul and fadd to the latter so it's not clear that there
15075 /// would be a gain or that the gain would be worthwhile enough to risk
15076 /// correctness bugs.
15077 ///
15078 /// For MVE, we set this to true as it helps simplify the need for some
15079 /// patterns (and we don't have the non-fused floating point instruction).
15080 bool ARMTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
15081  EVT VT) const {
15082  if (!VT.isSimple())
15083  return false;
15084 
15085  switch (VT.getSimpleVT().SimpleTy) {
15086  case MVT::v4f32:
15087  case MVT::v8f16:
15088  return Subtarget->hasMVEFloatOps();
15089  case MVT::f16:
15090  return Subtarget->useFPVFMx16();
15091  case MVT::f32:
15092  return Subtarget->useFPVFMx();
15093  case MVT::f64:
15094  return Subtarget->useFPVFMx64();
15095  default:
15096  break;
15097  }
15098 
15099  return false;
15100 }
15101 
15102 static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
15103  if (V < 0)
15104  return false;
15105 
15106  unsigned Scale = 1;
15107  switch (VT.getSimpleVT().SimpleTy) {
15108  case MVT::i1:
15109  case MVT::i8:
15110  // Scale == 1;
15111  break;
15112  case MVT::i16:
15113  // Scale == 2;
15114  Scale = 2;
15115  break;
15116  default:
15117  // On thumb1 we load most things (i32, i64, floats, etc) with a LDR
15118  // Scale == 4;
15119  Scale = 4;
15120  break;
15121  }
15122 
15123  if ((V & (Scale - 1)) != 0)
15124  return false;
15125  return isUInt<5>(V / Scale);
15126 }
15127 
15128 static bool isLegalT2AddressImmediate(int64_t V, EVT VT,
15129  const ARMSubtarget *Subtarget) {
15130  if (!VT.isInteger() && !VT.isFloatingPoint())
15131  return false;
15132  if (VT.isVector() && Subtarget->hasNEON())
15133  return false;
15134  if (VT.isVector() && VT.isFloatingPoint() && Subtarget->hasMVEIntegerOps() &&
15135  !Subtarget->hasMVEFloatOps())
15136  return false;
15137 
15138  bool IsNeg = false;
15139  if (V < 0) {
15140  IsNeg = true;
15141  V = -V;
15142  }
15143 
15144  unsigned NumBytes = std::max((unsigned)VT.getSizeInBits() / 8, 1U);
15145 
15146  // MVE: size * imm7
15147  if (VT.isVector() && Subtarget->hasMVEIntegerOps()) {
15148  switch (VT.getSimpleVT().getVectorElementType().SimpleTy) {
15149  case MVT::i32:
15150  case MVT::f32:
15151  return isShiftedUInt<7,2>(V);
15152  case MVT::i16:
15153  case MVT::f16:
15154  return isShiftedUInt<7,1>(V);
15155  case MVT::i8:
15156  return isUInt<7>(V);
15157  default:
15158  return false;
15159  }
15160  }
15161 
15162  // half VLDR: 2 * imm8
15163  if (VT.isFloatingPoint() && NumBytes == 2 && Subtarget->hasFPRegs16())
15164  return isShiftedUInt<8, 1>(V);
15165  // VLDR and LDRD: 4 * imm8
15166  if ((VT.isFloatingPoint() && Subtarget->hasVFP2Base()) || NumBytes == 8)
15167  return isShiftedUInt<8, 2>(V);
15168 
15169  if (NumBytes == 1 || NumBytes == 2 || NumBytes == 4) {
15170  // + imm12 or - imm8
15171  if (IsNeg)
15172  return isUInt<8>(V);
15173  return isUInt<12>(V);
15174  }
15175 
15176  return false;
15177 }
15178 
15179 /// isLegalAddressImmediate - Return true if the integer value can be used
15180 /// as the offset of the target addressing mode for load / store of the
15181 /// given type.
15182 static bool isLegalAddressImmediate(int64_t V, EVT VT,
15183  const ARMSubtarget *Subtarget) {
15184  if (V == 0)
15185  return true;
15186 
15187  if (!VT.isSimple())
15188  return false;
15189 
15190  if (Subtarget->isThumb1Only())
15191  return isLegalT1AddressImmediate(V, VT);
15192  else if (Subtarget->isThumb2())
15193  return isLegalT2AddressImmediate(V, VT, Subtarget);
15194 
15195  // ARM mode.
15196  if (V < 0)
15197  V = - V;
15198  switch (VT.getSimpleVT().SimpleTy) {
15199  default: return false;
15200  case MVT::i1:
15201  case MVT::i8:
15202  case MVT::i32:
15203  // +- imm12
15204  return isUInt<12>(V);
15205  case MVT::i16:
15206  // +- imm8
15207  return isUInt<8>(V);
15208  case MVT::f32:
15209  case MVT::f64:
15210  if (!Subtarget->hasVFP2Base()) // FIXME: NEON?
15211  return false;
15212  return isShiftedUInt<8, 2>(V);
15213  }
15214 }
15215 
15217  EVT VT) const {
15218  int Scale = AM.Scale;
15219  if (Scale < 0)
15220  return false;
15221 
15222  switch (VT.getSimpleVT().SimpleTy) {
15223  default: return false;
15224  case MVT::i1:
15225  case MVT::i8:
15226  case MVT::i16:
15227  case MVT::i32:
15228  if (Scale == 1)
15229  return true;
15230  // r + r << imm
15231  Scale = Scale & ~1;
15232  return Scale == 2 || Scale == 4 || Scale == 8;
15233  case MVT::i64:
15234  // FIXME: What are we trying to model here? ldrd doesn't have an r + r
15235  // version in Thumb mode.
15236  // r + r
15237  if (Scale == 1)
15238  return true;
15239  // r * 2 (this can be lowered to r + r).
15240  if (!AM.HasBaseReg && Scale == 2)
15241  return true;
15242  return false;
15243  case MVT::isVoid:
15244  // Note, we allow "void" uses (basically, uses that aren't loads or
15245  // stores), because arm allows folding a scale into many arithmetic
15246  // operations. This should be made more precise and revisited later.
15247 
15248  // Allow r << imm, but the imm has to be a multiple of two.
15249  if (Scale & 1) return false;
15250  return isPowerOf2_32(Scale);
15251  }
15252 }
15253 
15255  EVT VT) const {
15256  const int Scale = AM.Scale;
15257 
15258  // Negative scales are not supported in Thumb1.
15259  if (Scale < 0)
15260  return false;
15261 
15262  // Thumb1 addressing modes do not support register scaling excepting the
15263  // following cases:
15264  // 1. Scale == 1 means no scaling.
15265  // 2. Scale == 2 this can be lowered to r + r if there is no base register.
15266  return (Scale == 1) || (!AM.HasBaseReg && Scale == 2);
15267 }
15268 
15269 /// isLegalAddressingMode - Return true if the addressing mode represented
15270 /// by AM is legal for this target, for a load/store of the specified type.
15272  const AddrMode &AM, Type *Ty,
15273  unsigned AS, Instruction *I) const {
15274  EVT VT = getValueType(DL, Ty, true);
15275  if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget))
15276  return false;
15277 
15278  // Can never fold addr of global into load/store.
15279  if (AM.BaseGV)
15280  return false;
15281 
15282  switch (AM.Scale) {
15283  case 0: // no scale reg, must be "r+i" or "r", or "i".
15284  break;
15285  default:
15286  // ARM doesn't support any R+R*scale+imm addr modes.
15287  if (AM.BaseOffs)
15288  return false;
15289 
15290  if (!VT.isSimple())
15291  return false;
15292 
15293  if (Subtarget->isThumb1Only())
15294  return isLegalT1ScaledAddressingMode(AM, VT);
15295 
15296  if (Subtarget->isThumb2())
15297  return isLegalT2ScaledAddressingMode(AM, VT);
15298 
15299  int Scale = AM.Scale;
15300  switch (VT.getSimpleVT().SimpleTy) {
15301  default: return false;
15302  case MVT::i1:
15303  case MVT::i8:
15304  case MVT::i32:
15305  if (Scale < 0) Scale = -Scale;
15306  if (Scale == 1)
15307  return true;
15308  // r + r << imm
15309  return isPowerOf2_32(Scale & ~1);
15310  case MVT::i16:
15311  case MVT::i64:
15312  // r +/- r
15313  if (Scale == 1 || (AM.HasBaseReg && Scale == -1))
15314  return true;
15315  // r * 2 (this can be lowered to r + r).
15316  if (!AM.HasBaseReg && Scale == 2)
15317  return true;
15318  return false;
15319 
15320  case MVT::isVoid:
15321  // Note, we allow "void" uses (basically, uses that aren't loads or
15322  // stores), because arm allows folding a scale into many arithmetic
15323  // operations. This should be made more precise and revisited later.
15324 
15325  // Allow r << imm, but the imm has to be a multiple of two.
15326  if (Scale & 1) return false;
15327  return isPowerOf2_32(Scale);
15328  }
15329  }
15330  return true;
15331 }
15332 
15333 /// isLegalICmpImmediate - Return true if the specified immediate is legal
15334 /// icmp immediate, that is the target has icmp instructions which can compare
15335 /// a register against the immediate without having to materialize the
15336 /// immediate into a register.
15338  // Thumb2 and ARM modes can use cmn for negative immediates.
15339  if (!Subtarget->isThumb())
15340  return ARM_AM::getSOImmVal((uint32_t)Imm) != -1 ||
15341  ARM_AM::getSOImmVal(-(uint32_t)Imm) != -1;
15342  if (Subtarget->isThumb2())
15343  return ARM_AM::getT2SOImmVal((uint32_t)Imm) != -1 ||
15344  ARM_AM::getT2SOImmVal(-(uint32_t)Imm) != -1;
15345  // Thumb1 doesn't have cmn, and only 8-bit immediates.
15346  return Imm >= 0 && Imm <= 255;
15347 }
15348 
15349 /// isLegalAddImmediate - Return true if the specified immediate is a legal add
15350 /// *or sub* immediate, that is the target has add or sub instructions which can
15351 /// add a register with the immediate without having to materialize the
15352 /// immediate into a register.
15354  // Same encoding for add/sub, just flip the sign.
15355  int64_t AbsImm = std::abs(Imm);
15356  if (!Subtarget->isThumb())
15357  return ARM_AM::getSOImmVal(AbsImm) != -1;
15358  if (Subtarget->isThumb2())
15359  return ARM_AM::getT2SOImmVal(AbsImm) != -1;
15360  // Thumb1 only has 8-bit unsigned immediate.
15361  return AbsImm >= 0 && AbsImm <= 255;
15362 }
15363 
15364 static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT,
15365  bool isSEXTLoad, SDValue &Base,
15366  SDValue &Offset, bool &isInc,
15367  SelectionDAG &DAG) {
15368  if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
15369  return false;
15370 
15371  if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) {
15372  // AddressingMode 3
15373  Base = Ptr->getOperand(0);
15374  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
15375  int RHSC = (int)RHS->getZExtValue();
15376  if (RHSC < 0 && RHSC > -256) {
15377  assert(Ptr->getOpcode() == ISD::ADD);
15378  isInc = false;
15379  Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
15380  return true;
15381  }
15382  }
15383  isInc = (Ptr->getOpcode() == ISD::ADD);
15384  Offset = Ptr->getOperand(1);
15385  return true;
15386  } else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) {
15387  // AddressingMode 2
15388  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
15389  int RHSC = (int)RHS->getZExtValue();
15390  if (RHSC < 0 && RHSC > -0x1000) {
15391  assert(Ptr->getOpcode() == ISD::ADD);
15392  isInc = false;
15393  Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
15394  Base = Ptr->getOperand(0);
15395  return true;
15396  }
15397  }
15398 
15399  if (Ptr->getOpcode() == ISD::ADD) {
15400  isInc = true;
15401  ARM_AM::ShiftOpc ShOpcVal=
15403  if (ShOpcVal != ARM_AM::no_shift) {
15404  Base = Ptr->getOperand(1);
15405  Offset = Ptr->getOperand(0);
15406  } else {
15407  Base = Ptr->getOperand(0);
15408  Offset = Ptr->getOperand(1);
15409  }
15410  return true;
15411  }
15412 
15413  isInc = (Ptr->getOpcode() == ISD::ADD);
15414  Base = Ptr->getOperand(0);
15415  Offset = Ptr->getOperand(1);
15416  return true;
15417  }
15418 
15419  // FIXME: Use VLDM / VSTM to emulate indexed FP load / store.
15420  return false;
15421 }
15422 
15423 static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT,
15424  bool isSEXTLoad, SDValue &Base,
15425  SDValue &Offset, bool &isInc,
15426  SelectionDAG &DAG) {
15427  if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
15428  return false;
15429 
15430  Base = Ptr->getOperand(0);
15431  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
15432  int RHSC = (int)RHS->getZExtValue();
15433  if (RHSC < 0 && RHSC > -0x100) { // 8 bits.
15434  assert(Ptr->getOpcode() == ISD::ADD);
15435  isInc = false;
15436  Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
15437  return true;
15438  } else if (RHSC > 0 && RHSC < 0x100) { // 8 bit, no zero.
15439  isInc = Ptr->getOpcode() == ISD::ADD;
15440  Offset = DAG.getConstant(RHSC, SDLoc(Ptr), RHS->getValueType(0));
15441  return true;
15442  }
15443  }
15444 
15445  return false;
15446 }
15447 
15448 static bool getMVEIndexedAddressParts(SDNode *Ptr, EVT VT, unsigned Align,
15449  bool isSEXTLoad, bool IsMasked, bool isLE,
15451  bool &isInc, SelectionDAG &DAG) {
15452  if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
15453  return false;
15454  if (!isa<ConstantSDNode>(Ptr->getOperand(1)))
15455  return false;
15456 
15457  // We allow LE non-masked loads to change the type (for example use a vldrb.8
15458  // as opposed to a vldrw.32). This can allow extra addressing modes or
15459  // alignments for what is otherwise an equivalent instruction.
15460  bool CanChangeType = isLE && !IsMasked;
15461 
15462  ConstantSDNode *RHS = cast<ConstantSDNode>(Ptr->getOperand(1));
15463  int RHSC = (int)RHS->getZExtValue();
15464 
15465  auto IsInRange = [&](int RHSC, int Limit, int Scale) {
15466  if (RHSC < 0 && RHSC > -Limit * Scale && RHSC % Scale == 0) {
15467  assert(Ptr->getOpcode() == ISD::ADD);
15468  isInc = false;
15469  Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
15470  return true;
15471  } else if (RHSC > 0 && RHSC < Limit * Scale && RHSC % Scale == 0) {
15472  isInc = Ptr->getOpcode() == ISD::ADD;
15473  Offset = DAG.getConstant(RHSC, SDLoc(Ptr), RHS->getValueType(0));
15474  return true;
15475  }
15476  return false;
15477  };
15478 
15479  // Try to find a matching instruction based on s/zext, Alignment, Offset and
15480  // (in BE/masked) type.
15481  Base = Ptr->getOperand(0);
15482  if (VT == MVT::v4i16) {
15483  if (Align >= 2 && IsInRange(RHSC, 0x80, 2))
15484  return true;
15485  } else if (VT == MVT::v4i8 || VT == MVT::v8i8) {
15486  if (IsInRange(RHSC, 0x80, 1))
15487  return true;
15488  } else if (Align >= 4 &&
15489  (CanChangeType || VT == MVT::v4i32 || VT == MVT::v4f32) &&
15490  IsInRange(RHSC, 0x80, 4))
15491  return true;
15492  else if (Align >= 2 &&
15493  (CanChangeType || VT == MVT::v8i16 || VT == MVT::v8f16) &&
15494  IsInRange(RHSC, 0x80, 2))
15495  return true;
15496  else if ((CanChangeType || VT == MVT::v16i8) && IsInRange(RHSC, 0x80, 1))
15497  return true;
15498  return false;
15499 }
15500 
15501 /// getPreIndexedAddressParts - returns true by value, base pointer and
15502 /// offset pointer and addressing mode by reference if the node's address
15503 /// can be legally represented as pre-indexed load / store address.
15504 bool
15506  SDValue &Offset,
15507  ISD::MemIndexedMode &AM,
15508  SelectionDAG &DAG) const {
15509  if (Subtarget->isThumb1Only())
15510  return false;
15511 
15512  EVT VT;
15513  SDValue Ptr;
15514  unsigned Align;
15515  bool isSEXTLoad = false;
15516  bool IsMasked = false;
15517  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
15518  Ptr = LD->getBasePtr();
15519  VT = LD->getMemoryVT();
15520  Align = LD->getAlignment();
15521  isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
15522  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
15523  Ptr = ST->getBasePtr();
15524  VT = ST->getMemoryVT();
15525  Align = ST->getAlignment();
15526  } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
15527  Ptr = LD->getBasePtr();
15528  VT = LD->getMemoryVT();
15529  Align = LD->getAlignment();
15530  isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
15531  IsMasked = true;
15532  } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
15533  Ptr = ST->getBasePtr();
15534  VT = ST->getMemoryVT();
15535  Align = ST->getAlignment();
15536  IsMasked = true;
15537  } else
15538  return false;
15539 
15540  bool isInc;
15541  bool isLegal = false;
15542  if (VT.isVector())
15543  isLegal = Subtarget->hasMVEIntegerOps() &&
15545  IsMasked, Subtarget->isLittle(), Base,
15546  Offset, isInc, DAG);
15547  else {
15548  if (Subtarget->isThumb2())
15549  isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
15550  Offset, isInc, DAG);
15551  else
15552  isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
15553  Offset, isInc, DAG);
15554  }
15555  if (!isLegal)
15556  return false;
15557 
15558  AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC;
15559  return true;
15560 }
15561 
15562 /// getPostIndexedAddressParts - returns true by value, base pointer and
15563 /// offset pointer and addressing mode by reference if this node can be
15564 /// combined with a load / store to form a post-indexed load / store.
15566  SDValue &Base,
15567  SDValue &Offset,
15568  ISD::MemIndexedMode &AM,
15569  SelectionDAG &DAG) const {
15570  EVT VT;
15571  SDValue Ptr;
15572  unsigned Align;
15573  bool isSEXTLoad = false, isNonExt;
15574  bool IsMasked = false;
15575  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
15576  VT = LD->getMemoryVT();
15577  Ptr = LD->getBasePtr();
15578  Align = LD->getAlignment();
15579  isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
15580  isNonExt = LD->getExtensionType() == ISD::NON_EXTLOAD;
15581  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
15582  VT = ST->getMemoryVT();
15583  Ptr = ST->getBasePtr();
15584  Align = ST->getAlignment();
15585  isNonExt = !ST->isTruncatingStore();
15586  } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
15587  VT = LD->getMemoryVT();
15588  Ptr = LD->getBasePtr();
15589  Align = LD->getAlignment();
15590  isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
15591  isNonExt = LD->getExtensionType() == ISD::NON_EXTLOAD;
15592  IsMasked = true;
15593  } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
15594  VT = ST->getMemoryVT();
15595  Ptr = ST->getBasePtr();
15596  Align = ST->getAlignment();
15597  isNonExt = !ST->isTruncatingStore();
15598  IsMasked = true;
15599  } else
15600  return false;
15601 
15602  if (Subtarget->isThumb1Only()) {
15603  // Thumb-1 can do a limited post-inc load or store as an updating LDM. It
15604  // must be non-extending/truncating, i32, with an offset of 4.
15605  assert(Op->getValueType(0) == MVT::i32 && "Non-i32 post-inc op?!");
15606  if (Op->getOpcode() != ISD::ADD || !isNonExt)
15607  return false;
15608  auto *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1));
15609  if (!RHS || RHS->getZExtValue() != 4)
15610  return false;
15611 
15612  Offset = Op->getOperand(1);
15613  Base = Op->getOperand(0);
15614  AM = ISD::POST_INC;
15615  return true;
15616  }
15617 
15618  bool isInc;
15619  bool isLegal = false;
15620  if (VT.isVector())
15621  isLegal = Subtarget->hasMVEIntegerOps() &&
15622  getMVEIndexedAddressParts(Op, VT, Align, isSEXTLoad, IsMasked,
15623  Subtarget->isLittle(), Base, Offset,
15624  isInc, DAG);
15625  else {
15626  if (Subtarget->isThumb2())
15627  isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
15628  isInc, DAG);
15629  else
15630  isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
15631  isInc, DAG);
15632  }
15633  if (!isLegal)
15634  return false;
15635 
15636  if (Ptr != Base) {
15637  // Swap base ptr and offset to catch more post-index load / store when
15638  // it's legal. In Thumb2 mode, offset must be an immediate.
15639  if (Ptr == Offset && Op->getOpcode() == ISD::ADD &&
15640  !Subtarget->isThumb2())
15641  std::swap(Base, Offset);
15642 
15643  // Post-indexed load / store update the base pointer.
15644  if (Ptr != Base)
15645  return false;
15646  }
15647 
15648  AM = isInc ? ISD::POST_INC : ISD::POST_DEC;
15649  return true;
15650 }
15651 
15653  KnownBits &Known,
15654  const APInt &DemandedElts,
15655  const SelectionDAG &DAG,
15656  unsigned Depth) const {
15657  unsigned BitWidth = Known.getBitWidth();
15658  Known.resetAll();
15659  switch (Op.getOpcode()) {
15660  default: break;
15661  case ARMISD::ADDC:
15662  case ARMISD::ADDE:
15663  case ARMISD::SUBC:
15664  case ARMISD::SUBE:
15665  // Special cases when we convert a carry to a boolean.
15666  if (Op.getResNo() == 0) {
15667  SDValue LHS = Op.getOperand(0);
15668  SDValue RHS = Op.getOperand(1);
15669  // (ADDE 0, 0, C) will give us a single bit.
15670  if (Op->getOpcode() == ARMISD::ADDE && isNullConstant(LHS) &&
15671  isNullConstant(RHS)) {
15672  Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1);
15673  return;
15674  }
15675  }
15676  break;
15677  case ARMISD::CMOV: {
15678  // Bits are known zero/one if known on the LHS and RHS.
15679  Known = DAG.computeKnownBits(Op.getOperand(0), Depth+1);
15680  if (Known.isUnknown())
15681  return;
15682 
15683  KnownBits KnownRHS = DAG.computeKnownBits(Op.getOperand(1), Depth+1);
15684  Known.Zero &= KnownRHS.Zero;
15685  Known.One &= KnownRHS.One;
15686  return;
15687  }
15688  case ISD::INTRINSIC_W_CHAIN: {
15689  ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(1));
15690  Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue());
15691  switch (IntID) {
15692  default: return;
15693  case Intrinsic::arm_ldaex:
15694  case Intrinsic::arm_ldrex: {
15695  EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();
15696  unsigned MemBits = VT.getScalarSizeInBits();
15697  Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
15698  return;
15699  }
15700  }
15701  }
15702  case ARMISD::BFI: {
15703  // Conservatively, we can recurse down the first operand
15704  // and just mask out all affected bits.
15705  Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
15706 
15707  // The operand to BFI is already a mask suitable for removing the bits it
15708  // sets.
15709  ConstantSDNode *CI = cast<ConstantSDNode>(Op.getOperand(2));
15710  const APInt &Mask = CI->getAPIntValue();
15711  Known.Zero &= Mask;
15712  Known.One &= Mask;
15713  return;
15714  }
15715  case ARMISD::VGETLANEs:
15716  case ARMISD::VGETLANEu: {
15717  const SDValue &SrcSV = Op.getOperand(0);
15718  EVT VecVT = SrcSV.getValueType();
15719  assert(VecVT.isVector() && "VGETLANE expected a vector type");
15720  const unsigned NumSrcElts = VecVT.getVectorNumElements();
15721  ConstantSDNode *Pos = cast<ConstantSDNode>(Op.getOperand(1).getNode());
15722  assert(Pos->getAPIntValue().ult(NumSrcElts) &&
15723  "VGETLANE index out of bounds");
15724  unsigned Idx = Pos->getZExtValue();
15725  APInt DemandedElt = APInt::getOneBitSet(NumSrcElts, Idx);
15726  Known = DAG.computeKnownBits(SrcSV, DemandedElt, Depth + 1);
15727 
15728  EVT VT = Op.getValueType();
15729  const unsigned DstSz = VT.getScalarSizeInBits();
15730  const unsigned SrcSz = VecVT.getVectorElementType().getSizeInBits();
15731  (void)SrcSz;
15732  assert(SrcSz == Known.getBitWidth());
15733  assert(DstSz > SrcSz);
15734  if (Op.getOpcode() == ARMISD::VGETLANEs)
15735  Known = Known.sext(DstSz);
15736  else {
15737  Known = Known.zext(DstSz, true /* extended bits are known zero */);
15738  }
15739  assert(DstSz == Known.getBitWidth());
15740  break;
15741  }
15742  }
15743 }
15744 
15745 bool
15747  const APInt &DemandedAPInt,
15748  TargetLoweringOpt &TLO) const {
15749  // Delay optimization, so we don't have to deal with illegal types, or block
15750  // optimizations.
15751  if (!TLO.LegalOps)
15752  return false;
15753 
15754  // Only optimize AND for now.
15755  if (Op.getOpcode() != ISD::AND)
15756  return false;
15757 
15758  EVT VT = Op.getValueType();
15759 
15760  // Ignore vectors.
15761  if (VT.isVector())
15762  return false;
15763 
15764  assert(VT == MVT::i32 && "Unexpected integer type");
15765 
15766  // Make sure the RHS really is a constant.
15768  if (!C)
15769  return false;
15770 
15771  unsigned Mask = C->getZExtValue();
15772 
15773  unsigned Demanded = DemandedAPInt.getZExtValue();
15774  unsigned ShrunkMask = Mask & Demanded;
15775  unsigned ExpandedMask = Mask | ~Demanded;
15776 
15777  // If the mask is all zeros, let the target-independent code replace the
15778  // result with zero.
15779  if (ShrunkMask == 0)
15780  return false;
15781 
15782  // If the mask is all ones, erase the AND. (Currently, the target-independent
15783  // code won't do this, so we have to do it explicitly to avoid an infinite
15784  // loop in obscure cases.)
15785  if (ExpandedMask == ~0U)
15786  return TLO.CombineTo(Op, Op.getOperand(0));
15787 
15788  auto IsLegalMask = [ShrunkMask, ExpandedMask](unsigned Mask) -> bool {
15789  return (ShrunkMask & Mask) == ShrunkMask && (~ExpandedMask & Mask) == 0;
15790  };
15791  auto UseMask = [Mask, Op, VT, &TLO](unsigned NewMask) -> bool {
15792  if (NewMask == Mask)
15793  return true;
15794  SDLoc DL(Op);
15795  SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT);
15796  SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC);
15797  return TLO.CombineTo(Op, NewOp);
15798  };
15799 
15800  // Prefer uxtb mask.
15801  if (IsLegalMask(0xFF))
15802  return UseMask(0xFF);
15803 
15804  // Prefer uxth mask.
15805  if (IsLegalMask(0xFFFF))
15806  return UseMask(0xFFFF);
15807 
15808  // [1, 255] is Thumb1 movs+ands, legal immediate for ARM/Thumb2.
15809  // FIXME: Prefer a contiguous sequence of bits for other optimizations.
15810  if (ShrunkMask < 256)
15811  return UseMask(ShrunkMask);
15812 
15813  // [-256, -2] is Thumb1 movs+bics, legal immediate for ARM/Thumb2.
15814  // FIXME: Prefer a contiguous sequence of bits for other optimizations.
15815  if ((int)ExpandedMask <= -2 && (int)ExpandedMask >= -256)
15816  return UseMask(ExpandedMask);
15817 
15818  // Potential improvements:
15819  //
15820  // We could try to recognize lsls+lsrs or lsrs+lsls pairs here.
15821  // We could try to prefer Thumb1 immediates which can be lowered to a
15822  // two-instruction sequence.
15823  // We could try to recognize more legal ARM/Thumb2 immediates here.
15824 
15825  return false;
15826 }
15827 
15828 
15829 //===----------------------------------------------------------------------===//
15830 // ARM Inline Assembly Support
15831 //===----------------------------------------------------------------------===//
15832 
15834  // Looking for "rev" which is V6+.
15835  if (!Subtarget->hasV6Ops())
15836  return false;
15837 
15838  InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue());
15839  std::string AsmStr = IA->getAsmString();
15840  SmallVector<StringRef, 4> AsmPieces;
15841  SplitString(AsmStr, AsmPieces, ";\n");
15842 
15843  switch (AsmPieces.size()) {
15844  default: return false;
15845  case 1:
15846  AsmStr = AsmPieces[0];
15847  AsmPieces.clear();
15848  SplitString(AsmStr, AsmPieces, " \t,");
15849 
15850  // rev $0, $1
15851  if (AsmPieces.size() == 3 &&
15852  AsmPieces[0] == "rev" && AsmPieces[1] == "$0" && AsmPieces[2] == "$1" &&
15853  IA->getConstraintString().compare(0, 4, "=l,l") == 0) {
15854  IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
15855  if (Ty && Ty->getBitWidth() == 32)
15857  }
15858  break;
15859  }
15860 
15861  return false;
15862 }
15863 
15864 const char *ARMTargetLowering::LowerXConstraint(EVT ConstraintVT) const {
15865  // At this point, we have to lower this constraint to something else, so we
15866  // lower it to an "r" or "w". However, by doing this we will force the result
15867  // to be in register, while the X constraint is much more permissive.
15868  //
15869  // Although we are correct (we are free to emit anything, without
15870  // constraints), we might break use cases that would expect us to be more
15871  // efficient and emit something else.
15872  if (!Subtarget->hasVFP2Base())
15873  return "r";
15874  if (ConstraintVT.isFloatingPoint())
15875  return "w";
15876  if (ConstraintVT.isVector() && Subtarget->hasNEON() &&
15877  (ConstraintVT.getSizeInBits() == 64 ||
15878  ConstraintVT.getSizeInBits() == 128))
15879  return "w";
15880 
15881  return "r";
15882 }
15883 
15884 /// getConstraintType - Given a constraint letter, return the type of
15885 /// constraint it is for this target.
15888  unsigned S = Constraint.size();
15889  if (S == 1) {
15890  switch (Constraint[0]) {
15891  default: break;
15892  case 'l': return C_RegisterClass;
15893  case 'w': return C_RegisterClass;
15894  case 'h': return C_RegisterClass;
15895  case 'x': return C_RegisterClass;
15896  case 't': return C_RegisterClass;
15897  case 'j': return C_Immediate; // Constant for movw.
15898  // An address with a single base register. Due to the way we
15899  // currently handle addresses it is the same as an 'r' memory constraint.
15900  case 'Q': return C_Memory;
15901  }
15902  } else if (S == 2) {
15903  switch (Constraint[0]) {
15904  default: break;
15905  case 'T': return C_RegisterClass;
15906  // All 'U+' constraints are addresses.
15907  case 'U': return C_Memory;
15908  }
15909  }
15910  return TargetLowering::getConstraintType(Constraint);
15911 }
15912 
15913 /// Examine constraint type and operand type and determine a weight value.
15914 /// This object must already have been set up with the operand type
15915 /// and the current alternative constraint selected.
15918  AsmOperandInfo &info, const char *constraint) const {
15919  ConstraintWeight weight = CW_Invalid;
15920  Value *CallOperandVal = info.CallOperandVal;
15921  // If we don't have a value, we can't do a match,
15922  // but allow it at the lowest weight.
15923  if (!CallOperandVal)
15924  return CW_Default;
15925  Type *type = CallOperandVal->getType();
15926  // Look at the constraint type.
15927  switch (*constraint) {
15928  default:
15929  weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
15930  break;
15931  case 'l':
15932  if (type->isIntegerTy()) {
15933  if (Subtarget->isThumb())
15934  weight = CW_SpecificReg;
15935  else
15936  weight = CW_Register;
15937  }
15938  break;
15939  case 'w':
15940  if (type->isFloatingPointTy())
15941  weight = CW_Register;
15942  break;
15943  }
15944  return weight;
15945 }
15946 
15947 using RCPair = std::pair<unsigned, const TargetRegisterClass *>;
15948 
15950  const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
15951  switch (Constraint.size()) {
15952  case 1:
15953  // GCC ARM Constraint Letters
15954  switch (Constraint[0]) {
15955  case 'l': // Low regs or general regs.
15956  if (Subtarget->isThumb())
15957  return RCPair(0U, &ARM::tGPRRegClass);
15958  return RCPair(0U, &ARM::GPRRegClass);
15959  case 'h': // High regs or no regs.
15960  if (Subtarget->isThumb())
15961  return RCPair(0U, &ARM::hGPRRegClass);
15962  break;
15963  case 'r':
15964  if (Subtarget->isThumb1Only())
15965  return RCPair(0U, &ARM::tGPRRegClass);
15966  return RCPair(0U, &ARM::GPRRegClass);
15967  case 'w':
15968  if (VT == MVT::Other)
15969  break;
15970  if (VT == MVT::f32)
15971  return RCPair(0U, &ARM::SPRRegClass);
15972  if (VT.getSizeInBits() == 64)
15973  return RCPair(0U, &ARM::DPRRegClass);
15974  if (VT.getSizeInBits() == 128)
15975  return RCPair(0U, &ARM::QPRRegClass);
15976  break;
15977  case 'x':
15978  if (VT == MVT::Other)
15979  break;
15980  if (VT == MVT::f32)
15981  return RCPair(0U, &ARM::SPR_8RegClass);
15982  if (VT.getSizeInBits() == 64)
15983  return RCPair(0U, &ARM::DPR_8RegClass);
15984  if (VT.getSizeInBits() == 128)
15985  return RCPair(0U, &ARM::QPR_8RegClass);
15986  break;
15987  case 't':
15988  if (VT == MVT::Other)
15989  break;
15990  if (VT == MVT::f32 || VT == MVT::i32)
15991  return RCPair(0U, &ARM::SPRRegClass);
15992  if (VT.getSizeInBits() == 64)
15993  return RCPair(0U, &ARM::DPR_VFP2RegClass);
15994  if (VT.getSizeInBits() == 128)
15995  return RCPair(0U, &ARM::QPR_VFP2RegClass);
15996  break;
15997  }
15998  break;
15999 
16000  case 2:
16001  if (Constraint[0] == 'T') {
16002  switch (Constraint[1]) {
16003  default:
16004  break;
16005  case 'e':
16006  return RCPair(0U, &ARM::tGPREvenRegClass);
16007  case 'o':
16008  return RCPair(0U, &ARM::tGPROddRegClass);
16009  }
16010  }
16011  break;
16012 
16013  default:
16014  break;
16015  }
16016 
16017  if (StringRef("{cc}").equals_lower(Constraint))
16018  return std::make_pair(unsigned(ARM::CPSR), &ARM::CCRRegClass);
16019 
16020  return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
16021 }
16022 
16023 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
16024 /// vector. If it is invalid, don't add anything to Ops.
16026  std::string &Constraint,
16027  std::vector<SDValue>&Ops,
16028  SelectionDAG &DAG) const {
16029  SDValue Result;
16030 
16031  // Currently only support length 1 constraints.
16032  if (Constraint.length() != 1) return;
16033 
16034  char ConstraintLetter = Constraint[0];
16035  switch (ConstraintLetter) {
16036  default: break;
16037  case 'j':
16038  case 'I': case 'J': case 'K': case 'L':
16039  case 'M': case 'N': case 'O':
16041  if (!C)
16042  return;
16043 
16044  int64_t CVal64 = C->getSExtValue();
16045  int CVal = (int) CVal64;
16046  // None of these constraints allow values larger than 32 bits. Check
16047  // that the value fits in an int.
16048  if (CVal != CVal64)
16049  return;
16050 
16051  switch (ConstraintLetter) {
16052  case 'j':
16053  // Constant suitable for movw, must be between 0 and
16054  // 65535.
16055  if (Subtarget->hasV6T2Ops() || (Subtarget->hasV8MBaselineOps()))
16056  if (CVal >= 0 && CVal <= 65535)
16057  break;
16058  return;
16059  case 'I':
16060  if (Subtarget->isThumb1Only()) {
16061  // This must be a constant between 0 and 255, for ADD
16062  // immediates.
16063  if (CVal >= 0 && CVal <= 255)
16064  break;
16065  } else if (Subtarget->isThumb2()) {
16066  // A constant that can be used as an immediate value in a
16067  // data-processing instruction.
16068  if (ARM_AM::getT2SOImmVal(CVal) != -1)
16069  break;
16070  } else {
16071  // A constant that can be used as an immediate value in a
16072  // data-processing instruction.
16073  if (ARM_AM::getSOImmVal(CVal) != -1)
16074  break;
16075  }
16076  return;
16077 
16078  case 'J':
16079  if (Subtarget->isThumb1Only()) {
16080  // This must be a constant between -255 and -1, for negated ADD
16081  // immediates. This can be used in GCC with an "n" modifier that
16082  // prints the negated value, for use with SUB instructions. It is
16083  // not useful otherwise but is implemented for compatibility.
16084  if (CVal >= -255 && CVal <= -1)
16085  break;
16086  } else {
16087  // This must be a constant between -4095 and 4095. It is not clear
16088  // what this constraint is intended for. Implemented for
16089  // compatibility with GCC.
16090  if (CVal >= -4095 && CVal <= 4095)
16091  break;
16092  }
16093  return;
16094 
16095  case 'K':
16096  if (Subtarget->isThumb1Only()) {
16097  // A 32-bit value where only one byte has a nonzero value. Exclude
16098  // zero to match GCC. This constraint is used by GCC internally for
16099  // constants that can be loaded with a move/shift combination.
16100  // It is not useful otherwise but is implemented for compatibility.
16101  if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal))
16102  break;
16103  } else if (Subtarget->isThumb2()) {
16104  // A constant whose bitwise inverse can be used as an immediate
16105  // value in a data-processing instruction. This can be used in GCC
16106  // with a "B" modifier that prints the inverted value, for use with
16107  // BIC and MVN instructions. It is not useful otherwise but is
16108  // implemented for compatibility.
16109  if (ARM_AM::getT2SOImmVal(~CVal) != -1)
16110  break;
16111  } else {
16112  // A constant whose bitwise inverse can be used as an immediate
16113  // value in a data-processing instruction. This can be used in GCC
16114  // with a "B" modifier that prints the inverted value, for use with
16115  // BIC and MVN instructions. It is not useful otherwise but is
16116  // implemented for compatibility.
16117  if (ARM_AM::getSOImmVal(~CVal) != -1)
16118  break;
16119  }
16120  return;
16121 
16122  case 'L':
16123  if (Subtarget->isThumb1Only()) {
16124  // This must be a constant between -7 and 7,
16125  // for 3-operand ADD/SUB immediate instructions.
16126  if (CVal >= -7 && CVal < 7)
16127  break;
16128  } else if (Subtarget->isThumb2()) {
16129  // A constant whose negation can be used as an immediate value in a
16130  // data-processing instruction. This can be used in GCC with an "n"
16131  // modifier that prints the negated value, for use with SUB
16132  // instructions. It is not useful otherwise but is implemented for
16133  // compatibility.
16134  if (ARM_AM::getT2SOImmVal(-CVal) != -1)
16135  break;
16136  } else {
16137  // A constant whose negation can be used as an immediate value in a
16138  // data-processing instruction. This can be used in GCC with an "n"
16139  // modifier that prints the negated value, for use with SUB
16140  // instructions. It is not useful otherwise but is implemented for
16141  // compatibility.
16142  if (ARM_AM::getSOImmVal(-CVal) != -1)
16143  break;
16144  }
16145  return;
16146 
16147  case 'M':
16148  if (Subtarget->isThumb1Only()) {
16149  // This must be a multiple of 4 between 0 and 1020, for
16150  // ADD sp + immediate.
16151  if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0))
16152  break;
16153  } else {
16154  // A power of two or a constant between 0 and 32. This is used in
16155  // GCC for the shift amount on shifted register operands, but it is
16156  // useful in general for any shift amounts.
16157  if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0))
16158  break;
16159  }
16160  return;
16161 
16162  case 'N':
16163  if (Subtarget->isThumb1Only()) {
16164  // This must be a constant between 0 and 31, for shift amounts.
16165  if (CVal >= 0 && CVal <= 31)
16166  break;
16167  }
16168  return;
16169 
16170  case 'O':
16171  if (Subtarget->isThumb1Only()) {
16172  // This must be a multiple of 4 between -508 and 508, for
16173  // ADD/SUB sp = sp + immediate.
16174  if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0))
16175  break;
16176  }
16177  return;
16178  }
16179  Result = DAG.getTargetConstant(CVal, SDLoc(Op), Op.getValueType());
16180  break;
16181  }
16182 
16183  if (Result.getNode()) {
16184  Ops.push_back(Result);
16185  return;
16186  }
16187  return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
16188 }
16189 
16191  const SDNode *N, MVT::SimpleValueType SVT) {
16192  assert((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM ||
16193  N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) &&
16194  "Unhandled Opcode in getDivRemLibcall");
16195  bool isSigned = N->getOpcode() == ISD::SDIVREM ||
16196  N->getOpcode() == ISD::SREM;
16197  RTLIB::Libcall LC;
16198  switch (SVT) {
16199  default: llvm_unreachable("Unexpected request for libcall!");
16200  case MVT::i8: LC = isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
16201  case MVT::i16: LC = isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
16202  case MVT::i32: LC = isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
16203  case MVT::i64: LC = isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
16204  }
16205  return LC;
16206 }
16207 
16209  const SDNode *N, LLVMContext *Context, const ARMSubtarget *Subtarget) {
16210  assert((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM ||
16211  N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) &&
16212  "Unhandled Opcode in getDivRemArgList");
16213  bool isSigned = N->getOpcode() == ISD::SDIVREM ||
16214  N->getOpcode() == ISD::SREM;
16216  TargetLowering::ArgListEntry Entry;
16217  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
16218  EVT ArgVT = N->getOperand(i).getValueType();
16219  Type *ArgTy = ArgVT.getTypeForEVT(*Context);
16220  Entry.Node = N->getOperand(i);
16221  Entry.Ty = ArgTy;
16222  Entry.IsSExt = isSigned;
16223  Entry.IsZExt = !isSigned;
16224  Args.push_back(Entry);
16225  }
16226  if (Subtarget->isTargetWindows() && Args.size() >= 2)
16227  std::swap(Args[0], Args[1]);
16228  return Args;
16229 }
16230 
16231 SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
16232  assert((Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
16233  Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
16234  Subtarget->isTargetWindows()) &&
16235  "Register-based DivRem lowering only");
16236  unsigned Opcode = Op->getOpcode();
16237  assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) &&
16238  "Invalid opcode for Div/Rem lowering");
16239  bool isSigned = (Opcode == ISD::SDIVREM);
16240  EVT VT = Op->getValueType(0);
16241  Type *Ty = VT.getTypeForEVT(*DAG.getContext());
16242  SDLoc dl(Op);
16243 
16244  // If the target has hardware divide, use divide + multiply + subtract:
16245  // div = a / b
16246  // rem = a - b * div
16247  // return {div, rem}
16248  // This should be lowered into UDIV/SDIV + MLS later on.
16249  bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
16250  : Subtarget->hasDivideInARMMode();
16251  if (hasDivide && Op->getValueType(0).isSimple() &&
16252  Op->getSimpleValueType(0) == MVT::i32) {
16253  unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
16254  const SDValue Dividend = Op->getOperand(0);
16255  const SDValue Divisor = Op->getOperand(1);
16256  SDValue Div = DAG.getNode(DivOpcode, dl, VT, Dividend, Divisor);
16257  SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Div, Divisor);
16258  SDValue Rem = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul);
16259 
16260  SDValue Values[2] = {Div, Rem};
16261  return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VT, VT), Values);
16262  }
16263 
16265  VT.getSimpleVT().SimpleTy);
16266  SDValue InChain = DAG.getEntryNode();
16267 
16269  DAG.getContext(),
16270  Subtarget);
16271 
16272  SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
16273  getPointerTy(DAG.getDataLayout()));
16274 
16275  Type *RetTy = StructType::get(Ty, Ty);
16276 
16277  if (Subtarget->isTargetWindows())
16278  InChain = WinDBZCheckDenominator(DAG, Op.getNode(), InChain);
16279 
16281  CLI.setDebugLoc(dl).setChain(InChain)
16282  .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
16283  .setInRegister().setSExtResult(isSigned).setZExtResult(!isSigned);
16284 
16285  std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
16286  return CallInfo.first;
16287 }
16288 
16289 // Lowers REM using divmod helpers
16290 // see RTABI section 4.2/4.3
16291 SDValue ARMTargetLowering::LowerREM(SDNode *N, SelectionDAG &DAG) const {
16292  // Build return types (div and rem)
16293  std::vector<Type*> RetTyParams;
16294  Type *RetTyElement;
16295 
16296  switch (N->getValueType(0).getSimpleVT().SimpleTy) {
16297  default: llvm_unreachable("Unexpected request for libcall!");
16298  case MVT::i8: RetTyElement = Type::getInt8Ty(*DAG.getContext()); break;
16299  case MVT::i16: RetTyElement = Type::getInt16Ty(*DAG.getContext()); break;
16300  case MVT::i32: RetTyElement = Type::getInt32Ty(*DAG.getContext()); break;
16301  case MVT::i64: RetTyElement = Type::getInt64Ty(*DAG.getContext()); break;
16302  }
16303 
16304  RetTyParams.push_back(RetTyElement);
16305  RetTyParams.push_back(RetTyElement);
16306  ArrayRef<Type*> ret = ArrayRef<Type*>(RetTyParams);
16307  Type *RetTy = StructType::get(*DAG.getContext(), ret);
16308 
16310  SimpleTy);
16311  SDValue InChain = DAG.getEntryNode();
16313  Subtarget);
16314  bool isSigned = N->getOpcode() == ISD::SREM;
16315  SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
16316  getPointerTy(DAG.getDataLayout()));
16317 
16318  if (Subtarget->isTargetWindows())
16319  InChain = WinDBZCheckDenominator(DAG, N, InChain);
16320 
16321  // Lower call
16322  CallLoweringInfo CLI(DAG);
16323  CLI.setChain(InChain)
16324  .setCallee(CallingConv::ARM_AAPCS, RetTy, Callee, std::move(Args))
16325  .setSExtResult(isSigned).setZExtResult(!isSigned).setDebugLoc(SDLoc(N));
16326  std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
16327 
16328  // Return second (rem) result operand (first contains div)
16329  SDNode *ResNode = CallResult.first.getNode();
16330  assert(ResNode->getNumOperands() == 2 && "divmod should return two operands");
16331  return ResNode->getOperand(1);
16332 }
16333 
16334 SDValue
16335 ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
16336  assert(Subtarget->isTargetWindows() && "unsupported target platform");
16337  SDLoc DL(Op);
16338 
16339  // Get the inputs.
16340  SDValue Chain = Op.getOperand(0);
16341  SDValue Size = Op.getOperand(1);
16342 
16344  "no-stack-arg-probe")) {
16345  unsigned Align = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
16346  SDValue SP = DAG.getCopyFromReg(Chain, DL, ARM::SP, MVT::i32);
16347  Chain = SP.getValue(1);
16348  SP = DAG.getNode(ISD::SUB, DL, MVT::i32, SP, Size);
16349  if (Align)
16350  SP = DAG.getNode(ISD::AND, DL, MVT::i32, SP.getValue(0),
16351  DAG.getConstant(-(uint64_t)Align, DL, MVT::i32));
16352  Chain = DAG.getCopyToReg(Chain, DL, ARM::SP, SP);
16353  SDValue Ops[2] = { SP, Chain };
16354  return DAG.getMergeValues(Ops, DL);
16355  }
16356 
16357  SDValue Words = DAG.getNode(ISD::SRL, DL, MVT::i32, Size,
16358  DAG.getConstant(2, DL, MVT::i32));
16359 
16360  SDValue Flag;
16361  Chain = DAG.getCopyToReg(Chain, DL, ARM::R4, Words, Flag);
16362  Flag = Chain.getValue(1);
16363 
16364  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
16365  Chain = DAG.getNode(ARMISD::WIN__CHKSTK, DL, NodeTys, Chain, Flag);
16366 
16367  SDValue NewSP = DAG.getCopyFromReg(Chain, DL, ARM::SP, MVT::i32);
16368  Chain = NewSP.getValue(1);
16369 
16370  SDValue Ops[2] = { NewSP, Chain };
16371  return DAG.getMergeValues(Ops, DL);
16372 }
16373 
16374 SDValue ARMTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
16375  bool IsStrict = Op->isStrictFPOpcode();
16376  SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
16377  const unsigned DstSz = Op.getValueType().getSizeInBits();
16378  const unsigned SrcSz = SrcVal.getValueType().getSizeInBits();
16379  assert(DstSz > SrcSz && DstSz <= 64 && SrcSz >= 16 &&
16380  "Unexpected type for custom-lowering FP_EXTEND");
16381 
16382  assert((!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) &&
16383  "With both FP DP and 16, any FP conversion is legal!");
16384 
16385  assert(!(DstSz == 32 && Subtarget->hasFP16()) &&
16386  "With FP16, 16 to 32 conversion is legal!");
16387 
16388  // Converting from 32 -> 64 is valid if we have FP64.
16389  if (SrcSz == 32 && DstSz == 64 && Subtarget->hasFP64()) {
16390  // FIXME: Remove this when we have strict fp instruction selection patterns
16391  if (IsStrict) {
16392  SDLoc Loc(Op);
16393  SDValue Result = DAG.getNode(ISD::FP_EXTEND,
16394  Loc, Op.getValueType(), SrcVal);
16395  return DAG.getMergeValues({Result, Op.getOperand(0)}, Loc);
16396  }
16397  return Op;
16398  }
16399 
16400  // Either we are converting from 16 -> 64, without FP16 and/or
16401  // FP.double-precision or without Armv8-fp. So we must do it in two
16402  // steps.
16403  // Or we are converting from 32 -> 64 without fp.double-precision or 16 -> 32
16404  // without FP16. So we must do a function call.
16405  SDLoc Loc(Op);
16406  RTLIB::Libcall LC;
16407  MakeLibCallOptions CallOptions;
16408  SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
16409  for (unsigned Sz = SrcSz; Sz <= 32 && Sz < DstSz; Sz *= 2) {
16410  bool Supported = (Sz == 16 ? Subtarget->hasFP16() : Subtarget->hasFP64());
16411  MVT SrcVT = (Sz == 16 ? MVT::f16 : MVT::f32);
16412  MVT DstVT = (Sz == 16 ? MVT::f32 : MVT::f64);
16413  if (Supported) {
16414  if (IsStrict) {
16415  SrcVal = DAG.getNode(ISD::STRICT_FP_EXTEND, Loc,
16416  {DstVT, MVT::Other}, {Chain, SrcVal});
16417  Chain = SrcVal.getValue(1);
16418  } else {
16419  SrcVal = DAG.getNode(ISD::FP_EXTEND, Loc, DstVT, SrcVal);
16420  }
16421  } else {
16422  LC = RTLIB::getFPEXT(SrcVT, DstVT);
16423  assert(LC != RTLIB::UNKNOWN_LIBCALL &&
16424  "Unexpected type for custom-lowering FP_EXTEND");
16425  std::tie(SrcVal, Chain) = makeLibCall(DAG, LC, DstVT, SrcVal, CallOptions,
16426  Loc, Chain);
16427  }
16428  }
16429 
16430  return IsStrict ? DAG.getMergeValues({SrcVal, Chain}, Loc) : SrcVal;
16431 }
16432 
16433 SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
16434  bool IsStrict = Op->isStrictFPOpcode();
16435 
16436  SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
16437  EVT SrcVT = SrcVal.getValueType();
16438  EVT DstVT = Op.getValueType();
16439  const unsigned DstSz = Op.getValueType().getSizeInBits();
16440  const unsigned SrcSz = SrcVT.getSizeInBits();
16441  (void)DstSz;
16442  assert(DstSz < SrcSz && SrcSz <= 64 && DstSz >= 16 &&
16443  "Unexpected type for custom-lowering FP_ROUND");
16444 
16445  assert((!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) &&
16446  "With both FP DP and 16, any FP conversion is legal!");
16447 
16448  SDLoc Loc(Op);
16449 
16450  // Instruction from 32 -> 16 if hasFP16 is valid
16451  if (SrcSz == 32 && Subtarget->hasFP16())
16452  return Op;
16453 
16454  // Lib call from 32 -> 16 / 64 -> [32, 16]
16455  RTLIB::Libcall LC = RTLIB::getFPROUND(SrcVT, DstVT);
16456  assert(LC != RTLIB::UNKNOWN_LIBCALL &&
16457  "Unexpected type for custom-lowering FP_ROUND");
16458  MakeLibCallOptions CallOptions;
16459  SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
16460  SDValue Result;
16461  std::tie(Result, Chain) = makeLibCall(DAG, LC, DstVT, SrcVal, CallOptions,
16462  Loc, Chain);
16463  return IsStrict ? DAG.getMergeValues({Result, Chain}, Loc) : Result;
16464 }
16465 
16466 void ARMTargetLowering::lowerABS(SDNode *N, SmallVectorImpl<SDValue> &Results,
16467  SelectionDAG &DAG) const {
16468  assert(N->getValueType(0) == MVT::i64 && "Unexpected type (!= i64) on ABS.");
16469  MVT HalfT = MVT::i32;
16470  SDLoc dl(N);
16471  SDValue Hi, Lo, Tmp;
16472 
16473  if (!isOperationLegalOrCustom(ISD::ADDCARRY, HalfT) ||
16475  return ;
16476 
16477  unsigned OpTypeBits = HalfT.getScalarSizeInBits();
16478  SDVTList VTList = DAG.getVTList(HalfT, MVT::i1);
16479 
16480  Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(0),
16481  DAG.getConstant(0, dl, HalfT));
16482  Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(0),
16483  DAG.getConstant(1, dl, HalfT));
16484 
16485  Tmp = DAG.getNode(ISD::SRA, dl, HalfT, Hi,
16486  DAG.getConstant(OpTypeBits - 1, dl,
16487  getShiftAmountTy(HalfT, DAG.getDataLayout())));
16488  Lo = DAG.getNode(ISD::UADDO, dl, VTList, Tmp, Lo);
16489  Hi = DAG.getNode(ISD::ADDCARRY, dl, VTList, Tmp, Hi,
16490  SDValue(Lo.getNode(), 1));
16491  Hi = DAG.getNode(ISD::XOR, dl, HalfT, Tmp, Hi);
16492  Lo = DAG.getNode(ISD::XOR, dl, HalfT, Tmp, Lo);
16493 
16494  Results.push_back(Lo);
16495  Results.push_back(Hi);
16496 }
16497 
16498 bool
16500  // The ARM target isn't yet aware of offsets.
16501  return false;
16502 }
16503 
16505  if (v == 0xffffffff)
16506  return false;
16507 
16508  // there can be 1's on either or both "outsides", all the "inside"
16509  // bits must be 0's
16510  return isShiftedMask_32(~v);
16511 }
16512 
16513 /// isFPImmLegal - Returns true if the target can instruction select the
16514 /// specified FP immediate natively. If false, the legalizer will
16515 /// materialize the FP immediate as a load from a constant pool.
16517  bool ForCodeSize) const {
16518  if (!Subtarget->hasVFP3Base())
16519  return false;
16520  if (VT == MVT::f16 && Subtarget->hasFullFP16())
16521  return ARM_AM::getFP16Imm(Imm) != -1;
16522  if (VT == MVT::f32)
16523  return ARM_AM::getFP32Imm(Imm) != -1;
16524  if (VT == MVT::f64 && Subtarget->hasFP64())
16525  return ARM_AM::getFP64Imm(Imm) != -1;
16526  return false;
16527 }
16528 
16529 /// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
16530 /// MemIntrinsicNodes. The associated MachineMemOperands record the alignment
16531 /// specified in the intrinsic calls.
16533  const CallInst &I,
16534  MachineFunction &MF,
16535  unsigned Intrinsic) const {
16536  switch (Intrinsic) {
16537  case Intrinsic::arm_neon_vld1:
16538  case Intrinsic::arm_neon_vld2:
16539  case Intrinsic::arm_neon_vld3:
16540  case Intrinsic::arm_neon_vld4:
16541  case Intrinsic::arm_neon_vld2lane:
16542  case Intrinsic::arm_neon_vld3lane:
16543  case Intrinsic::arm_neon_vld4lane:
16544  case Intrinsic::arm_neon_vld2dup:
16545  case Intrinsic::arm_neon_vld3dup:
16546  case Intrinsic::arm_neon_vld4dup: {
16547  Info.opc = ISD::INTRINSIC_W_CHAIN;
16548  // Conservatively set memVT to the entire set of vectors loaded.
16549  auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
16550  uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64;
16551  Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
16552  Info.ptrVal = I.getArgOperand(0);
16553  Info.offset = 0;
16554  Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
16555  Info.align = MaybeAlign(cast<ConstantInt>(AlignArg)->getZExtValue());
16556  // volatile loads with NEON intrinsics not supported
16558  return true;
16559  }
16560  case Intrinsic::arm_neon_vld1x2:
16561  case Intrinsic::arm_neon_vld1x3:
16562  case Intrinsic::arm_neon_vld1x4: {
16563  Info.opc = ISD::INTRINSIC_W_CHAIN;
16564  // Conservatively set memVT to the entire set of vectors loaded.
16565  auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
16566  uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64;
16567  Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
16568  Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1);
16569  Info.offset = 0;
16570  Info.align.reset();
16571  // volatile loads with NEON intrinsics not supported
16573  return true;
16574  }
16575  case Intrinsic::arm_neon_vst1:
16576  case Intrinsic::arm_neon_vst2:
16577  case Intrinsic::arm_neon_vst3:
16578  case Intrinsic::arm_neon_vst4:
16579  case Intrinsic::arm_neon_vst2lane:
16580  case Intrinsic::arm_neon_vst3lane:
16581  case Intrinsic::arm_neon_vst4lane: {
16582  Info.opc = ISD::INTRINSIC_VOID;
16583  // Conservatively set memVT to the entire set of vectors stored.
16584  auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
16585  unsigned NumElts = 0;
16586  for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) {
16587  Type *ArgTy = I.getArgOperand(ArgI)->getType();
16588  if (!ArgTy->isVectorTy())
16589  break;
16590  NumElts += DL.getTypeSizeInBits(ArgTy) / 64;
16591  }
16592  Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
16593  Info.ptrVal = I.getArgOperand(0);
16594  Info.offset = 0;
16595  Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
16596  Info.align = MaybeAlign(cast<ConstantInt>(AlignArg)->getZExtValue());
16597  // volatile stores with NEON intrinsics not supported
16599  return true;
16600  }
16601  case Intrinsic::arm_neon_vst1x2:
16602  case Intrinsic::arm_neon_vst1x3:
16603  case Intrinsic::arm_neon_vst1x4: {
16604  Info.opc = ISD::INTRINSIC_VOID;
16605  // Conservatively set memVT to the entire set of vectors stored.
16606  auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
16607  unsigned NumElts = 0;
16608  for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) {
16609  Type *ArgTy = I.getArgOperand(ArgI)->getType();
16610  if (!ArgTy->isVectorTy())
16611  break;
16612  NumElts += DL.getTypeSizeInBits(ArgTy) / 64;
16613  }
16614  Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
16615  Info.ptrVal = I.getArgOperand(0);
16616  Info.offset = 0;
16617  Info.align.reset();
16618  // volatile stores with NEON intrinsics not supported
16620  return true;
16621  }
16622  case Intrinsic::arm_ldaex:
16623  case Intrinsic::arm_ldrex: {
16624  auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
16625  PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
16626  Info.opc = ISD::INTRINSIC_W_CHAIN;
16627  Info.memVT = MVT::getVT(PtrTy->getElementType());
16628  Info.ptrVal = I.getArgOperand(0);
16629  Info.offset = 0;
16630  Info.align = MaybeAlign(DL.getABITypeAlignment(PtrTy->getElementType()));
16632  return true;
16633  }
16634  case Intrinsic::arm_stlex:
16635  case Intrinsic::arm_strex: {
16636  auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
16637  PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType());
16638  Info.opc = ISD::INTRINSIC_W_CHAIN;
16639  Info.memVT = MVT::getVT(PtrTy->getElementType());
16640  Info.ptrVal = I.getArgOperand(1);
16641  Info.offset = 0;
16642  Info.align = MaybeAlign(DL.getABITypeAlignment(PtrTy->getElementType()));
16644  return true;
16645  }
16646  case Intrinsic::arm_stlexd:
16647  case Intrinsic::arm_strexd:
16648  Info.opc = ISD::INTRINSIC_W_CHAIN;
16649  Info.memVT = MVT::i64;
16650  Info.ptrVal = I.getArgOperand(2);
16651  Info.offset = 0;
16652  Info.align = Align(8);
16654  return true;
16655 
16656  case Intrinsic::arm_ldaexd:
16657  case Intrinsic::arm_ldrexd:
16658  Info.opc = ISD::INTRINSIC_W_CHAIN;
16659  Info.memVT = MVT::i64;
16660  Info.ptrVal = I.getArgOperand(0);
16661  Info.offset = 0;
16662  Info.align = Align(8);
16664  return true;
16665 
16666  default:
16667  break;
16668  }
16669 
16670  return false;
16671 }
16672 
16673 /// Returns true if it is beneficial to convert a load of a constant
16674 /// to just the constant itself.
16676  Type *Ty) const {
16677  assert(Ty->isIntegerTy());
16678 
16679  unsigned Bits = Ty->getPrimitiveSizeInBits();
16680  if (Bits == 0 || Bits > 32)
16681  return false;
16682  return true;
16683 }
16684 
16686  unsigned Index) const {
16688  return false;
16689 
16690  return (Index == 0 || Index == ResVT.getVectorNumElements());
16691 }
16692 
16694  ARM_MB::MemBOpt Domain) const {
16695  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
16696 
16697  // First, if the target has no DMB, see what fallback we can use.
16698  if (!Subtarget->hasDataBarrier()) {
16699  // Some ARMv6 cpus can support data barriers with an mcr instruction.
16700  // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
16701  // here.
16702  if (Subtarget->hasV6Ops() && !Subtarget->isThumb()) {
16703  Function *MCR = Intrinsic::getDeclaration(M, Intrinsic::arm_mcr);
16704  Value* args[6] = {Builder.getInt32(15), Builder.getInt32(0),
16705  Builder.getInt32(0), Builder.getInt32(7),
16706  Builder.getInt32(10), Builder.getInt32(5)};
16707  return Builder.CreateCall(MCR, args);
16708  } else {
16709  // Instead of using barriers, atomic accesses on these subtargets use
16710  // libcalls.
16711  llvm_unreachable("makeDMB on a target so old that it has no barriers");
16712  }
16713  } else {
16714  Function *DMB = Intrinsic::getDeclaration(M, Intrinsic::arm_dmb);
16715  // Only a full system barrier exists in the M-class architectures.
16716  Domain = Subtarget->isMClass() ? ARM_MB::SY : Domain;
16717  Constant *CDomain = Builder.getInt32(Domain);
16718  return Builder.CreateCall(DMB, CDomain);
16719  }
16720 }
16721 
16722 // Based on http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
16724  Instruction *Inst,
16725  AtomicOrdering Ord) const {
16726  switch (Ord) {
16729  llvm_unreachable("Invalid fence: unordered/non-atomic");
16732  return nullptr; // Nothing to do
16734  if (!Inst->hasAtomicStore())
16735  return nullptr; // Nothing to do
16739  if (Subtarget->preferISHSTBarriers())
16740  return makeDMB(Builder, ARM_MB::ISHST);
16741  // FIXME: add a comment with a link to documentation justifying this.
16742  else
16743  return makeDMB(Builder, ARM_MB::ISH);
16744  }
16745  llvm_unreachable("Unknown fence ordering in emitLeadingFence");
16746 }
16747 
16749  Instruction *Inst,
16750  AtomicOrdering Ord) const {
16751  switch (Ord) {
16754  llvm_unreachable("Invalid fence: unordered/not-atomic");
16757  return nullptr; // Nothing to do
16761  return makeDMB(Builder, ARM_MB::ISH);
16762  }
16763  llvm_unreachable("Unknown fence ordering in emitTrailingFence");
16764 }
16765 
16766 // Loads and stores less than 64-bits are already atomic; ones above that
16767 // are doomed anyway, so defer to the default libcall and blame the OS when
16768 // things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit
16769 // anything for those.
16771  unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
16772  return (Size == 64) && !Subtarget->isMClass();
16773 }
16774 
16775 // Loads and stores less than 64-bits are already atomic; ones above that
16776 // are doomed anyway, so defer to the default libcall and blame the OS when
16777 // things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit
16778 // anything for those.
16779 // FIXME: ldrd and strd are atomic if the CPU has LPAE (e.g. A15 has that
16780 // guarantee, see DDI0406C ARM architecture reference manual,
16781 // sections A8.8.72-74 LDRD)
16784  unsigned Size = LI->getType()->getPrimitiveSizeInBits();
16785  return ((Size == 64) && !Subtarget->isMClass()) ? AtomicExpansionKind::LLOnly
16787 }
16788 
16789 // For the real atomic operations, we have ldrex/strex up to 32 bits,
16790 // and up to 64 bits on the non-M profiles
16793  if (AI->isFloatingPointOperation())
16795 
16796  unsigned Size = AI->getType()->getPrimitiveSizeInBits();
16797  bool hasAtomicRMW = !Subtarget->isThumb() || Subtarget->hasV8MBaselineOps();
16798  return (Size <= (Subtarget->isMClass() ? 32U : 64U) && hasAtomicRMW)
16801 }
16802 
16805  // At -O0, fast-regalloc cannot cope with the live vregs necessary to
16806  // implement cmpxchg without spilling. If the address being exchanged is also
16807  // on the stack and close enough to the spill slot, this can lead to a
16808  // situation where the monitor always gets cleared and the atomic operation
16809  // can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
16810  bool HasAtomicCmpXchg =
16811  !Subtarget->isThumb() || Subtarget->hasV8MBaselineOps();
16812  if (getTargetMachine().getOptLevel() != 0 && HasAtomicCmpXchg)
16815 }
16816 
16818  const Instruction *I) const {
16819  return InsertFencesForAtomic;
16820 }
16821 
16822 // This has so far only been implemented for MachO.
16824  return Subtarget->isTargetMachO();
16825 }
16826 
16828  if (!Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
16830 
16831  // MSVC CRT has a global variable holding security cookie.
16832  M.getOrInsertGlobal("__security_cookie",
16834 
16835  // MSVC CRT has a function to validate security cookie.
16836  FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
16837  "__security_check_cookie", Type::getVoidTy(M.getContext()),
16839  if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee()))
16840  F->addAttribute(1, Attribute::AttrKind::InReg);
16841 }
16842 
16844  // MSVC CRT has a global variable holding security cookie.
16845  if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
16846  return M.getGlobalVariable("__security_cookie");
16848 }
16849 
16851  // MSVC CRT has a function to validate security cookie.
16852  if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
16853  return M.getFunction("__security_check_cookie");
16855 }
16856 
16858  unsigned &Cost) const {
16859  // If we do not have NEON, vector types are not natively supported.
16860  if (!Subtarget->hasNEON())
16861  return false;
16862 
16863  // Floating point values and vector values map to the same register file.
16864  // Therefore, although we could do a store extract of a vector type, this is
16865  // better to leave at float as we have more freedom in the addressing mode for
16866  // those.
16867  if (VectorTy->isFPOrFPVectorTy())
16868  return false;
16869 
16870  // If the index is unknown at compile time, this is very expensive to lower
16871  // and it is not possible to combine the store with the extract.
16872  if (!isa<ConstantInt>(Idx))
16873  return false;
16874 
16875  assert(VectorTy->isVectorTy() && "VectorTy is not a vector type");
16876  unsigned BitWidth = cast<VectorType>(VectorTy)->getBitWidth();
16877  // We can do a store + vector extract on any vector that fits perfectly in a D
16878  // or Q register.
16879  if (BitWidth == 64 || BitWidth == 128) {
16880  Cost = 0;
16881  return true;
16882  }
16883  return false;
16884 }
16885 
16887  return Subtarget->hasV6T2Ops();
16888 }
16889 
16891  return Subtarget->hasV6T2Ops();
16892 }
16893 
16895  return !Subtarget->hasMinSize() || Subtarget->isTargetWindows();
16896 }
16897 
16899  AtomicOrdering Ord) const {
16900  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
16901  Type *ValTy = cast<PointerType>(Addr->getType())->getElementType();
16902  bool IsAcquire = isAcquireOrStronger(Ord);
16903 
16904  // Since i64 isn't legal and intrinsics don't get type-lowered, the ldrexd
16905  // intrinsic must return {i32, i32} and we have to recombine them into a
16906  // single i64 here.
16907  if (ValTy->getPrimitiveSizeInBits() == 64) {
16908  Intrinsic::ID Int =
16909  IsAcquire ? Intrinsic::arm_ldaexd : Intrinsic::arm_ldrexd;
16910  Function *Ldrex = Intrinsic::getDeclaration(M, Int);
16911 
16912  Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
16913  Value *LoHi = Builder.CreateCall(Ldrex, Addr, "lohi");
16914 
16915  Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
16916  Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
16917  if (!Subtarget->isLittle())
16918  std::swap (Lo, Hi);
16919  Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
16920  Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
16921  return Builder.CreateOr(
16922  Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 32)), "val64");
16923  }
16924 
16925  Type *Tys[] = { Addr->getType() };
16926  Intrinsic::ID Int = IsAcquire ? Intrinsic::arm_ldaex : Intrinsic::arm_ldrex;
16927  Function *Ldrex = Intrinsic::getDeclaration(M, Int, Tys);
16928 
16929  return Builder.CreateTruncOrBitCast(
16930  Builder.CreateCall(Ldrex, Addr),
16931  cast<PointerType>(Addr->getType())->getElementType());
16932 }
16933 
16935  IRBuilder<> &Builder) const {
16936  if (!Subtarget->hasV7Ops())
16937  return;
16938  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
16939  Builder.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::arm_clrex));
16940 }
16941 
16943  Value *Addr,
16944  AtomicOrdering Ord) const {
16945  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
16946  bool IsRelease = isReleaseOrStronger(Ord);
16947 
16948  // Since the intrinsics must have legal type, the i64 intrinsics take two
16949  // parameters: "i32, i32". We must marshal Val into the appropriate form
16950  // before the call.
16951  if (Val->getType()->getPrimitiveSizeInBits() == 64) {
16952  Intrinsic::ID Int =
16953  IsRelease ? Intrinsic::arm_stlexd : Intrinsic::arm_strexd;
16954  Function *Strex = Intrinsic::getDeclaration(M, Int);
16956 
16957  Value *Lo = Builder.CreateTrunc(Val, Int32Ty, "lo");
16958  Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 32), Int32Ty, "hi");
16959  if (!Subtarget->isLittle())
16960  std::swap(Lo, Hi);
16961  Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
16962  return Builder.CreateCall(Strex, {Lo, Hi, Addr});
16963  }
16964 
16965  Intrinsic::ID Int = IsRelease ? Intrinsic::arm_stlex : Intrinsic::arm_strex;
16966  Type *Tys[] = { Addr->getType() };
16967  Function *Strex = Intrinsic::getDeclaration(M, Int, Tys);
16968 
16969  return Builder.CreateCall(
16970  Strex, {Builder.CreateZExtOrBitCast(
16971  Val, Strex->getFunctionType()->getParamType(0)),
16972  Addr});
16973 }
16974 
16975 
16977  return Subtarget->isMClass();
16978 }
16979 
16980 /// A helper function for determining the number of interleaved accesses we
16981 /// will generate when lowering accesses of the given type.
16982 unsigned
16984  const DataLayout &DL) const {
16985  return (DL.getTypeSizeInBits(VecTy) + 127) / 128;
16986 }
16987 
16989  unsigned Factor, VectorType *VecTy, const DataLayout &DL) const {
16990 
16991  unsigned VecSize = DL.getTypeSizeInBits(VecTy);
16992  unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType());
16993 
16994  if (!Subtarget->hasNEON() && !Subtarget->hasMVEIntegerOps())
16995  return false;
16996 
16997  // Ensure the vector doesn't have f16 elements. Even though we could do an
16998  // i16 vldN, we can't hold the f16 vectors and will end up converting via
16999  // f32.
17000  if (Subtarget->hasNEON() && VecTy->getElementType()->isHalfTy())
17001  return false;
17002  if (Subtarget->hasMVEIntegerOps() && Factor == 3)
17003  return false;
17004 
17005  // Ensure the number of vector elements is greater than 1.
17006  if (VecTy->getNumElements() < 2)
17007  return false;
17008 
17009  // Ensure the element type is legal.
17010  if (ElSize != 8 && ElSize != 16 && ElSize != 32)
17011  return false;
17012 
17013  // Ensure the total vector size is 64 or a multiple of 128. Types larger than
17014  // 128 will be split into multiple interleaved accesses.
17015  if (Subtarget->hasNEON() && VecSize == 64)
17016  return true;
17017  return VecSize % 128 == 0;
17018 }
17019 
17021  if (Subtarget->hasNEON())
17022  return 4;
17023  if (Subtarget->hasMVEIntegerOps())
17026 }
17027 
17028 /// Lower an interleaved load into a vldN intrinsic.
17029 ///
17030 /// E.g. Lower an interleaved load (Factor = 2):
17031 /// %wide.vec = load <8 x i32>, <8 x i32>* %ptr, align 4
17032 /// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
17033 /// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
17034 ///
17035 /// Into:
17036 /// %vld2 = { <4 x i32>, <4 x i32> } call llvm.arm.neon.vld2(%ptr, 4)
17037 /// %vec0 = extractelement { <4 x i32>, <4 x i32> } %vld2, i32 0
17038 /// %vec1 = extractelement { <4 x i32>, <4 x i32> } %vld2, i32 1
17041  ArrayRef<unsigned> Indices, unsigned Factor) const {
17042  assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
17043  "Invalid interleave factor");
17044  assert(!Shuffles.empty() && "Empty shufflevector input");
17045  assert(Shuffles.size() == Indices.size() &&
17046  "Unmatched number of shufflevectors and indices");
17047 
17048  VectorType *VecTy = Shuffles[0]->getType();
17049  Type *EltTy = VecTy->getVectorElementType();
17050 
17051  const DataLayout &DL = LI->getModule()->getDataLayout();
17052 
17053  // Skip if we do not have NEON and skip illegal vector types. We can
17054  // "legalize" wide vector types into multiple interleaved accesses as long as
17055  // the vector types are divisible by 128.
17056  if (!isLegalInterleavedAccessType(Factor, VecTy, DL))
17057  return false;
17058 
17059  unsigned NumLoads = getNumInterleavedAccesses(VecTy, DL);
17060 
17061  // A pointer vector can not be the return type of the ldN intrinsics. Need to
17062  // load integer vectors first and then convert to pointer vectors.
17063  if (EltTy->isPointerTy())
17064  VecTy =
17065  VectorType::get(DL.getIntPtrType(EltTy), VecTy->getVectorNumElements());
17066 
17067  IRBuilder<> Builder(LI);
17068 
17069  // The base address of the load.
17070  Value *BaseAddr = LI->getPointerOperand();
17071 
17072  if (NumLoads > 1) {
17073  // If we're going to generate more than one load, reset the sub-vector type
17074  // to something legal.
17075  VecTy = VectorType::get(VecTy->getVectorElementType(),
17076  VecTy->getVectorNumElements() / NumLoads);
17077 
17078  // We will compute the pointer operand of each load from the original base
17079  // address using GEPs. Cast the base address to a pointer to the scalar
17080  // element type.
17081  BaseAddr = Builder.CreateBitCast(
17082  BaseAddr, VecTy->getVectorElementType()->getPointerTo(
17083  LI->getPointerAddressSpace()));
17084  }
17085 
17086  assert(isTypeLegal(EVT::getEVT(VecTy)) && "Illegal vldN vector type!");
17087 
17088  auto createLoadIntrinsic = [&](Value *BaseAddr) {
17089  if (Subtarget->hasNEON()) {
17090  Type *Int8Ptr = Builder.getInt8PtrTy(LI->getPointerAddressSpace());
17091  Type *Tys[] = {VecTy, Int8Ptr};
17092  static const Intrinsic::ID LoadInts[3] = {Intrinsic::arm_neon_vld2,
17093  Intrinsic::arm_neon_vld3,
17094  Intrinsic::arm_neon_vld4};
17095  Function *VldnFunc =
17096  Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], Tys);
17097 
17099  Ops.push_back(Builder.CreateBitCast(BaseAddr, Int8Ptr));
17100  Ops.push_back(Builder.getInt32(LI->getAlignment()));
17101 
17102  return Builder.CreateCall(VldnFunc, Ops, "vldN");
17103  } else {
17104  assert((Factor == 2 || Factor == 4) &&
17105  "expected interleave factor of 2 or 4 for MVE");
17106  Intrinsic::ID LoadInts =
17107  Factor == 2 ? Intrinsic::arm_mve_vld2q : Intrinsic::arm_mve_vld4q;
17108  Type *VecEltTy = VecTy->getVectorElementType()->getPointerTo(
17109  LI->getPointerAddressSpace());
17110  Type *Tys[] = {VecTy, VecEltTy};
17111  Function *VldnFunc =
17112  Intrinsic::getDeclaration(LI->getModule(), LoadInts, Tys);
17113 
17115  Ops.push_back(Builder.CreateBitCast(BaseAddr, VecEltTy));
17116  return Builder.CreateCall(VldnFunc, Ops, "vldN");
17117  }
17118  };
17119 
17120  // Holds sub-vectors extracted from the load intrinsic return values. The
17121  // sub-vectors are associated with the shufflevector instructions they will
17122  // replace.
17124 
17125  for (unsigned LoadCount = 0; LoadCount < NumLoads; ++LoadCount) {
17126  // If we're generating more than one load, compute the base address of
17127  // subsequent loads as an offset from the previous.
17128  if (LoadCount > 0)
17129  BaseAddr =
17130  Builder.CreateConstGEP1_32(VecTy->getVectorElementType(), BaseAddr,
17131  VecTy->getVectorNumElements() * Factor);
17132 
17133  CallInst *VldN = createLoadIntrinsic(BaseAddr);
17134 
17135  // Replace uses of each shufflevector with the corresponding vector loaded
17136  // by ldN.
17137  for (unsigned i = 0; i < Shuffles.size(); i++) {
17138  ShuffleVectorInst *SV = Shuffles[i];
17139  unsigned Index = Indices[i];
17140 
17141  Value *SubVec = Builder.CreateExtractValue(VldN, Index);
17142 
17143  // Convert the integer vector to pointer vector if the element is pointer.
17144  if (EltTy->isPointerTy())
17145  SubVec = Builder.CreateIntToPtr(
17146  SubVec, VectorType::get(SV->getType()->getVectorElementType(),
17147  VecTy->getVectorNumElements()));
17148 
17149  SubVecs[SV].push_back(SubVec);
17150  }
17151  }
17152 
17153  // Replace uses of the shufflevector instructions with the sub-vectors
17154  // returned by the load intrinsic. If a shufflevector instruction is
17155  // associated with more than one sub-vector, those sub-vectors will be
17156  // concatenated into a single wide vector.
17157  for (ShuffleVectorInst *SVI : Shuffles) {
17158  auto &SubVec = SubVecs[SVI];
17159  auto *WideVec =
17160  SubVec.size() > 1 ? concatenateVectors(Builder, SubVec) : SubVec[0];
17161  SVI->replaceAllUsesWith(WideVec);
17162  }
17163 
17164  return true;
17165 }
17166 
17167 /// Lower an interleaved store into a vstN intrinsic.
17168 ///
17169 /// E.g. Lower an interleaved store (Factor = 3):
17170 /// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
17171 /// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
17172 /// store <12 x i32> %i.vec, <12 x i32>* %ptr, align 4
17173 ///
17174 /// Into:
17175 /// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
17176 /// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
17177 /// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
17178 /// call void llvm.arm.neon.vst3(%ptr, %sub.v0, %sub.v1, %sub.v2, 4)
17179 ///
17180 /// Note that the new shufflevectors will be removed and we'll only generate one
17181 /// vst3 instruction in CodeGen.
17182 ///
17183 /// Example for a more general valid mask (Factor 3). Lower:
17184 /// %i.vec = shuffle <32 x i32> %v0, <32 x i32> %v1,
17185 /// <4, 32, 16, 5, 33, 17, 6, 34, 18, 7, 35, 19>
17186 /// store <12 x i32> %i.vec, <12 x i32>* %ptr
17187 ///
17188 /// Into:
17189 /// %sub.v0 = shuffle <32 x i32> %v0, <32 x i32> v1, <4, 5, 6, 7>
17190 /// %sub.v1 = shuffle <32 x i32> %v0, <32 x i32> v1, <32, 33, 34, 35>
17191 /// %sub.v2 = shuffle <32 x i32> %v0, <32 x i32> v1, <16, 17, 18, 19>
17192 /// call void llvm.arm.neon.vst3(%ptr, %sub.v0, %sub.v1, %sub.v2, 4)
17194  ShuffleVectorInst *SVI,
17195  unsigned Factor) const {
17196  assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
17197  "Invalid interleave factor");
17198 
17199  VectorType *VecTy = SVI->getType();
17200  assert(VecTy->getVectorNumElements() % Factor == 0 &&
17201  "Invalid interleaved store");
17202 
17203  unsigned LaneLen = VecTy->getVectorNumElements() / Factor;
17204  Type *EltTy = VecTy->getVectorElementType();
17205  VectorType *SubVecTy = VectorType::get(EltTy, LaneLen);
17206 
17207  const DataLayout &DL = SI->getModule()->getDataLayout();
17208 
17209  // Skip if we do not have NEON and skip illegal vector types. We can
17210  // "legalize" wide vector types into multiple interleaved accesses as long as
17211  // the vector types are divisible by 128.
17212  if (!isLegalInterleavedAccessType(Factor, SubVecTy, DL))
17213  return false;
17214 
17215  unsigned NumStores = getNumInterleavedAccesses(SubVecTy, DL);
17216 
17217  Value *Op0 = SVI->getOperand(0);
17218  Value *Op1 = SVI->getOperand(1);
17219  IRBuilder<> Builder(SI);
17220 
17221  // StN intrinsics don't support pointer vectors as arguments. Convert pointer
17222  // vectors to integer vectors.
17223  if (EltTy->isPointerTy()) {
17224  Type *IntTy = DL.getIntPtrType(EltTy);
17225 
17226  // Convert to the corresponding integer vector.
17227  Type *IntVecTy =
17228  VectorType::get(IntTy, Op0->getType()->getVectorNumElements());
17229  Op0 = Builder.CreatePtrToInt(Op0, IntVecTy);
17230  Op1 = Builder.CreatePtrToInt(Op1, IntVecTy);
17231 
17232  SubVecTy = VectorType::get(IntTy, LaneLen);
17233  }
17234 
17235  // The base address of the store.
17236  Value *BaseAddr = SI->getPointerOperand();
17237 
17238  if (NumStores > 1) {
17239  // If we're going to generate more than one store, reset the lane length
17240  // and sub-vector type to something legal.
17241  LaneLen /= NumStores;
17242  SubVecTy = VectorType::get(SubVecTy->getVectorElementType(), LaneLen);
17243 
17244  // We will compute the pointer operand of each store from the original base
17245  // address using GEPs. Cast the base address to a pointer to the scalar
17246  // element type.
17247  BaseAddr = Builder.CreateBitCast(
17248  BaseAddr, SubVecTy->getVectorElementType()->getPointerTo(
17249  SI->getPointerAddressSpace()));
17250  }
17251 
17252  assert(isTypeLegal(EVT::getEVT(SubVecTy)) && "Illegal vstN vector type!");
17253 
17254  auto Mask = SVI->getShuffleMask();
17255 
17256  auto createStoreIntrinsic = [&](Value *BaseAddr,
17257  SmallVectorImpl<Value *> &Shuffles) {
17258  if (Subtarget->hasNEON()) {
17259  static const Intrinsic::ID StoreInts[3] = {Intrinsic::arm_neon_vst2,
17260  Intrinsic::arm_neon_vst3,
17261  Intrinsic::arm_neon_vst4};
17262  Type *Int8Ptr = Builder.getInt8PtrTy(SI->getPointerAddressSpace());
17263  Type *Tys[] = {Int8Ptr, SubVecTy};
17264 
17265  Function *VstNFunc = Intrinsic::getDeclaration(
17266  SI->getModule(), StoreInts[Factor - 2], Tys);
17267 
17269  Ops.push_back(Builder.CreateBitCast(BaseAddr, Int8Ptr));
17270  for (auto S : Shuffles)
17271  Ops.push_back(S);
17272  Ops.push_back(Builder.getInt32(SI->getAlignment()));
17273  Builder.CreateCall(VstNFunc, Ops);
17274  } else {
17275  assert((Factor == 2 || Factor == 4) &&
17276  "expected interleave factor of 2 or 4 for MVE");
17277  Intrinsic::ID StoreInts =
17278  Factor == 2 ? Intrinsic::arm_mve_vst2q : Intrinsic::arm_mve_vst4q;
17279  Type *EltPtrTy = SubVecTy->getVectorElementType()->getPointerTo(
17280  SI->getPointerAddressSpace());
17281  Type *Tys[] = {EltPtrTy, SubVecTy};
17282  Function *VstNFunc =
17283  Intrinsic::getDeclaration(SI->getModule(), StoreInts, Tys);
17284 
17286  Ops.push_back(Builder.CreateBitCast(BaseAddr, EltPtrTy));
17287  for (auto S : Shuffles)
17288  Ops.push_back(S);
17289  for (unsigned F = 0; F < Factor; F++) {
17290  Ops.push_back(Builder.getInt32(F));
17291  Builder.CreateCall(VstNFunc, Ops);
17292  Ops.pop_back();
17293  }
17294  }
17295  };
17296 
17297  for (unsigned StoreCount = 0; StoreCount < NumStores; ++StoreCount) {
17298  // If we generating more than one store, we compute the base address of
17299  // subsequent stores as an offset from the previous.
17300  if (StoreCount > 0)
17301  BaseAddr = Builder.CreateConstGEP1_32(SubVecTy->getVectorElementType(),
17302  BaseAddr, LaneLen * Factor);
17303 
17304  SmallVector<Value *, 4> Shuffles;
17305 
17306  // Split the shufflevector operands into sub vectors for the new vstN call.
17307  for (unsigned i = 0; i < Factor; i++) {
17308  unsigned IdxI = StoreCount * LaneLen * Factor + i;
17309  if (Mask[IdxI] >= 0) {
17310  Shuffles.push_back(Builder.CreateShuffleVector(
17311  Op0, Op1, createSequentialMask(Builder, Mask[IdxI], LaneLen, 0)));
17312  } else {
17313  unsigned StartMask = 0;
17314  for (unsigned j = 1; j < LaneLen; j++) {
17315  unsigned IdxJ = StoreCount * LaneLen * Factor + j;
17316  if (Mask[IdxJ * Factor + IdxI] >= 0) {
17317  StartMask = Mask[IdxJ * Factor + IdxI] - IdxJ;
17318  break;
17319  }
17320  }
17321  // Note: If all elements in a chunk are undefs, StartMask=0!
17322  // Note: Filling undef gaps with random elements is ok, since
17323  // those elements were being written anyway (with undefs).
17324  // In the case of all undefs we're defaulting to using elems from 0
17325  // Note: StartMask cannot be negative, it's checked in
17326  // isReInterleaveMask
17327  Shuffles.push_back(Builder.CreateShuffleVector(
17328  Op0, Op1, createSequentialMask(Builder, StartMask, LaneLen, 0)));
17329  }
17330  }
17331 
17332  createStoreIntrinsic(BaseAddr, Shuffles);
17333  }
17334  return true;
17335 }
17336 
17343 };
17344 
17346  uint64_t &Members) {
17347  if (auto *ST = dyn_cast<StructType>(Ty)) {
17348  for (unsigned i = 0; i < ST->getNumElements(); ++i) {
17349  uint64_t SubMembers = 0;
17350  if (!isHomogeneousAggregate(ST->getElementType(i), Base, SubMembers))
17351  return false;
17352  Members += SubMembers;
17353  }
17354  } else if (auto *AT = dyn_cast<ArrayType>(Ty)) {
17355  uint64_t SubMembers = 0;
17356  if (!isHomogeneousAggregate(AT->getElementType(), Base, SubMembers))
17357  return false;
17358  Members += SubMembers * AT->getNumElements();
17359  } else if (Ty->isFloatTy()) {
17360  if (Base != HA_UNKNOWN && Base != HA_FLOAT)
17361  return false;
17362  Members = 1;
17363  Base = HA_FLOAT;
17364  } else if (Ty->isDoubleTy()) {
17365  if (Base != HA_UNKNOWN && Base != HA_DOUBLE)
17366  return false;
17367  Members = 1;
17368  Base = HA_DOUBLE;
17369  } else if (auto *VT = dyn_cast<VectorType>(Ty)) {
17370  Members = 1;
17371  switch (Base) {
17372  case HA_FLOAT:
17373  case HA_DOUBLE:
17374  return false;
17375  case HA_VECT64:
17376  return VT->getBitWidth() == 64;
17377  case HA_VECT128:
17378  return VT->getBitWidth() == 128;
17379  case HA_UNKNOWN:
17380  switch (VT->getBitWidth()) {
17381  case 64:
17382  Base = HA_VECT64;
17383  return true;
17384  case 128:
17385  Base = HA_VECT128;
17386  return true;
17387  default:
17388  return false;
17389  }
17390  }
17391  }
17392 
17393  return (Members > 0 && Members <= 4);
17394 }
17395 
17396 /// Return the correct alignment for the current calling convention.
17398  DataLayout DL) const {
17399  const Align ABITypeAlign(DL.getABITypeAlignment(ArgTy));
17400  if (!ArgTy->isVectorTy())
17401  return ABITypeAlign;
17402 
17403  // Avoid over-aligning vector parameters. It would require realigning the
17404  // stack and waste space for no real benefit.
17405  return std::min(ABITypeAlign, DL.getStackAlignment());
17406 }
17407 
17408 /// Return true if a type is an AAPCS-VFP homogeneous aggregate or one of
17409 /// [N x i32] or [N x i64]. This allows front-ends to skip emitting padding when
17410 /// passing according to AAPCS rules.
17412  Type *Ty, CallingConv::ID CallConv, bool isVarArg) const {
17413  if (getEffectiveCallingConv(CallConv, isVarArg) !=
17415  return false;
17416 
17418  uint64_t Members = 0;
17419  bool IsHA = isHomogeneousAggregate(Ty, Base, Members);
17420  LLVM_DEBUG(dbgs() << "isHA: " << IsHA << " "; Ty->dump());
17421 
17422  bool IsIntArray = Ty->isArrayTy() && Ty->getArrayElementType()->isIntegerTy();
17423  return IsHA || IsIntArray;
17424 }
17425 
17427  const Constant *PersonalityFn) const {
17428  // Platforms which do not use SjLj EH may return values in these registers
17429  // via the personality function.
17430  return Subtarget->useSjLjEH() ? ARM::NoRegister : ARM::R0;
17431 }
17432 
17434  const Constant *PersonalityFn) const {
17435  // Platforms which do not use SjLj EH may return values in these registers
17436  // via the personality function.
17437  return Subtarget->useSjLjEH() ? ARM::NoRegister : ARM::R1;
17438 }
17439 
17440 void ARMTargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
17441  // Update IsSplitCSR in ARMFunctionInfo.
17442  ARMFunctionInfo *AFI = Entry->getParent()->getInfo<ARMFunctionInfo>();
17443  AFI->setIsSplitCSR(true);
17444 }
17445 
17446 void ARMTargetLowering::insertCopiesSplitCSR(
17447  MachineBasicBlock *Entry,
17448  const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
17449  const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
17450  const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
17451  if (!IStart)
17452  return;
17453 
17454  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
17455  MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
17456  MachineBasicBlock::iterator MBBI = Entry->begin();
17457  for (const MCPhysReg *I = IStart; *I; ++I) {
17458  const TargetRegisterClass *RC = nullptr;
17459  if (ARM::GPRRegClass.contains(*I))
17460  RC = &ARM::GPRRegClass;
17461  else if (ARM::DPRRegClass.contains(*I))
17462  RC = &ARM::DPRRegClass;
17463  else
17464  llvm_unreachable("Unexpected register class in CSRsViaCopy!");
17465 
17466  Register NewVR = MRI->createVirtualRegister(RC);
17467  // Create copy from CSR to a virtual register.
17468  // FIXME: this currently does not emit CFI pseudo-instructions, it works
17469  // fine for CXX_FAST_TLS since the C++-style TLS access functions should be
17470  // nounwind. If we want to generalize this later, we may need to emit
17471  // CFI pseudo-instructions.
17473  Attribute::NoUnwind) &&
17474  "Function should be nounwind in insertCopiesSplitCSR!");
17475  Entry->addLiveIn(*I);
17476  BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
17477  .addReg(*I);
17478 
17479  // Insert the copy-back instructions right before the terminator.
17480  for (auto *Exit : Exits)
17481  BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
17482  TII->get(TargetOpcode::COPY), *I)
17483  .addReg(NewVR);
17484  }
17485 }
17486 
17490 }
virtual std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const
Return the largest legal super-reg register class of the register class for the specified type and it...
static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG)
PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG)
std::pair< Value *, Value * > ShuffleOps
We are building a shuffle to create V, which is a sequence of insertelement, extractelement pairs...
static SDValue PerformVDUPCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformVDUPCombine - Target-specific dag combine xforms for ARMISD::VDUP.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
static bool isValidMVECond(unsigned CC, bool IsFloat)
bool hasV5TEOps() const
Definition: ARMSubtarget.h:570
bool isMachineConstantPoolEntry() const
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, unsigned Align, MachineMemOperand::Flags Flags, bool *Fast) const override
allowsMisalignedMemoryAccesses - Returns true if the target allows unaligned memory accesses of the s...
Type * getVectorElementType() const
Definition: Type.h:376
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
void setFrameAddressIsTaken(bool T)
uint64_t CallInst * C
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:625
X = FP_ROUND(Y, TRUNC) - Rounding &#39;Y&#39; from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:599
bool useMovt() const
Value * getValueOperand()
Definition: Instructions.h:426
MO_DLLIMPORT - On a symbol operand, this represents that the reference to the symbol is for an import...
Definition: ARMBaseInfo.h:271
static SDValue CombineVMOVDRRCandidateWithVecOp(const SDNode *BC, SelectionDAG &DAG)
BC is a bitcast that is about to be turned into a VMOVDRR.
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set, or Regs.size() if they are all allocated.
static MVT getIntegerVT(unsigned BitWidth)
void AnalyzeCallResult(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeCallResult - Analyze the return values of a call, incorporating info about the passed values i...
const MachineInstrBuilder & add(const MachineOperand &MO) const
static SDValue ConvertCarryFlagToBooleanCarry(SDValue Flags, EVT VT, SelectionDAG &DAG)
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:943
static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG)
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static bool isVZIPMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:641
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
bool isLegalT2ScaledAddressingMode(const AddrMode &AM, EVT VT) const
static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:652
static SDValue PerformTruncatingStoreCombine(StoreSDNode *St, SelectionDAG &DAG)
EVT getValueType() const
Return the ValueType of the referenced return value.
bool useFPVFMx64() const
Definition: ARMSubtarget.h:643
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:71
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg If BaseGV is null...
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool RetFastCC_ARM_APCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool isTargetGNUAEABI() const
Definition: ARMSubtarget.h:725
class_match< UndefValue > m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:87
const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const override
getRegClassFor - Return the register class that should be used for the specified value type...
Value * CreateConstGEP1_32(Value *Ptr, unsigned Idx0, const Twine &Name="")
Definition: IRBuilder.h:1866
static SDValue AddCombineToVPADD(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
void markGlobalAsPromotedToConstantPool(const GlobalVariable *GV)
Indicate to the backend that GV has had its storage changed to inside a constant pool.
void finalizeLowering(MachineFunction &MF) const override
Execute target specific actions to finalize target lowering.
iterator_range< use_iterator > uses()
Definition: Value.h:375
static SDValue PerformBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformBUILD_VECTORCombine - Target-specific dag combine xforms for ISD::BUILD_VECTOR.
bool hasCallSiteLandingPad(MCSymbol *Sym)
Return true if the landing pad Eh symbol has an associated call site.
static bool isConstant(const MachineInstr &MI)
bool isUndef() const
static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt)
isVShiftLImm - Check if this is a valid build_vector for the immediate operand of a vector shift left...
raw_ostream & errs()
This returns a reference to a raw_ostream for standard error.
static MachinePointerInfo getJumpTable(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a jump table entry.
static const APInt * isPowerOf2Constant(SDValue V)
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand...
bool hasBaseDSP() const
Definition: ARMSubtarget.h:681
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
getSetCCResultType - Return the value type to use for ISD::SETCC.
const GlobalValue * getGlobal() const
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant, which is required to be operand #1) half of the integer or float value specified as operand #0.
Definition: ISDOpcodes.h:185
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1620
Value * getSDagStackGuard(const Module &M) const override
Return the variable that&#39;s previously inserted by insertSSPDeclarations, if any, otherwise return nul...
#define R4(n)
static APInt getAllOnesValue(unsigned numBits)
Get the all-ones value.
Definition: APInt.h:566
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
This class represents an incoming formal argument to a Function.
Definition: Argument.h:29
LLVMContext & Context
bool shouldFoldConstantShiftPairToMask(const SDNode *N, CombineLevel Level) const override
Return true if it is profitable to fold a pair of shifts into a mask.
static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG)
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:426
SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
MachineBasicBlock * getMBB() const
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
bool isOSMSVCRT() const
Is this a "Windows" OS targeting a "MSVCRT.dll" environment.
Definition: Triple.h:576
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it&#39;s not CSE&#39;d)...
Definition: SelectionDAG.h:895
unsigned getFunctionNumber() const
getFunctionNumber - Return a unique ID for the current function.
static ARMConstantPoolSymbol * Create(LLVMContext &C, StringRef s, unsigned ID, unsigned char PCAdj)
static SDValue WinDBZCheckDenominator(SelectionDAG &DAG, SDNode *N, SDValue InChain)
Atomic ordering constants.
int getFunctionContextIndex() const
Return the index for the function context object.
static SDValue PromoteMVEPredVector(SDLoc dl, SDValue Pred, EVT VT, SelectionDAG &DAG)
bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector...
bool CC_ARM_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
void AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const override
This method should be implemented by targets that mark instructions with the &#39;hasPostISelHook&#39; flag...
static bool areExtractExts(Value *Ext1, Value *Ext2)
Check if Ext1 and Ext2 are extends of the same type, doubling the bitwidth of the vector elements...
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR (an vector value) starting with the ...
Definition: ISDOpcodes.h:412
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where &#39;op&#39; is a valid SetCC operation.
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:140
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:708
bool isThumb() const
Definition: ARMSubtarget.h:765
This class represents lattice values for constants.
Definition: AllocatorList.h:23
static SDValue PerformVMULCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformVMULCombine Distribute (A + B) * C to (A * C) + (B * C) to take advantage of the special multi...
static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static bool IsMasked(Instruction *I)
bool hasDivideInThumbMode() const
Definition: ARMSubtarget.h:626
Register getLocReg() const
static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op, SelectionDAG &DAG)
Align getStackAlignment() const
Definition: DataLayout.h:268
std::pair< unsigned, const TargetRegisterClass * > RCPair
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:140
const unsigned char * bytes_end() const
Definition: StringRef.h:129
int getFP16Imm(const APInt &Imm)
getFP16Imm - Return an 8-bit floating-point version of the 16-bit floating-point value.
static MVT getVectorVT(MVT VT, unsigned NumElements)
static bool IsVUZPShuffleNode(SDNode *N)
TOF
Target Operand Flag enum.
Definition: ARMBaseInfo.h:238
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0...
Definition: ISDOpcodes.h:663
bool hasVAStart() const
Returns true if the function calls the llvm.va_start intrinsic.
StringRef getPrivateGlobalPrefix() const
Definition: DataLayout.h:320
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:421
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:262
static void ExpandREAD_REGISTER(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
static SDValue LowerVECTOR_SHUFFLEUsingMovs(SDValue Op, ArrayRef< int > ShuffleMask, SelectionDAG &DAG)
unsigned EnableDebugEntryValues
Emit debug info about parameter&#39;s entry values.
unsigned EnableFastISel
EnableFastISel - This flag enables fast-path instruction selection which trades away generated code q...
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:66
iterator begin() const
Definition: ArrayRef.h:146
const char * LowerXConstraint(EVT ConstraintVT) const override
Try to replace an X constraint, which matches anything, with another that has more specific requireme...
static SDValue PerformADDECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformADDECombine - Target-specific dag combine transform from ARMISD::ADDC, ARMISD::ADDE, and ISD::MUL_LOHI to MLAL or ARMISD::ADDC, ARMISD::ADDE and ARMISD::UMLAL to ARMISD::UMAAL.
void setIsEHPad(bool V=true)
Indicates the block is a landing pad.
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
amdgpu Simplify well known AMD library false FunctionCallee Value const Twine & Name
unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL) const
Returns the number of interleaved accesses that will be generated when lowering accesses of the given...
Instruction * emitTrailingFence(IRBuilder<> &Builder, Instruction *Inst, AtomicOrdering Ord) const override
bool isOSBinFormatELF() const
Tests whether the OS uses the ELF binary format.
Definition: Triple.h:624
An instruction that atomically checks whether a specified value is in a memory location, and, if it is, stores a new value there.
Definition: Instructions.h:547
Sched::Preference getSchedulingPreference() const
Return target scheduling preference.
bool isLittle() const
Definition: ARMSubtarget.h:805
const SDValue & getBasePtr() const
static SDValue PerformInsertEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
PerformInsertEltCombine - Target-specific dag combine xforms for ISD::INSERT_VECTOR_ELT.
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:170
EABI EABIVersion
EABIVersion - This flag specifies the EABI version.
static bool isVTRNMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG &DAG)
SkipLoadExtensionForVMULL - return a load of the original vector size that does not do any sign/zero ...
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
virtual void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:400
SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const
PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.
ARMConstantPoolValue - ARM specific constantpool value.
static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG, bool isSigned)
isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each element has been zero/sign-...
unsigned addLiveIn(unsigned PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
static cl::opt< unsigned > MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden, cl::desc("Maximum interleave factor for MVE VLDn to generate."), cl::init(2))
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
void setIsDef(bool Val=true)
Change a def to a use, or a use to a def.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:188
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition: ValueTypes.h:366
void emitAtomicCmpXchgNoStoreLLBalance(IRBuilder<> &Builder) const override
static bool isHomogeneousAggregate(Type *Ty, HABaseType &Base, uint64_t &Members)
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPreIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mod...
static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
Y = RRC X, rotate right via carry.
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
getTgtMemIntrinsic - Represent NEON load and store intrinsics as MemIntrinsicNodes.
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:146
static bool hasNormalLoadOperand(SDNode *N)
hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node are normal, non-volatile loads.
const SDValue & getValue() const
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
bool isLegalT1ScaledAddressingMode(const AddrMode &AM, EVT VT) const
Returns true if the addressing mode representing by AM is legal for the Thumb1 target, for a load/store of the specified type.
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain...
Definition: ISDOpcodes.h:760
SDVTList getVTList() const
This class represents a function call, abstracting a target machine&#39;s calling convention.
unsigned Reg
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
EK_Inline - Jump table entries are emitted inline at their point of use.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Get a value with low bits set.
Definition: APInt.h:664
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElement(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
const MachineInstrBuilder & addJumpTableIndex(unsigned Idx, unsigned TargetFlags=0) const
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:255
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit...
Register getFrameRegister(const MachineFunction &MF) const override
Global Offset Table, Thread Pointer Offset.
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change...
static unsigned SelectPairHalf(unsigned Elements, ArrayRef< int > Mask, unsigned Index)
static MVT getFloatingPointVT(unsigned BitWidth)
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:252
bool hasV7Ops() const
Definition: ARMSubtarget.h:575
const std::string & getAsmString() const
Definition: InlineAsm.h:80
const SDValue & getChain() const
static SDValue PerformSUBCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
static bool isLegalT1AddressImmediate(int64_t V, EVT VT)
static cl::opt< bool > ARMInterworking("arm-interworking", cl::Hidden, cl::desc("Enable / disable ARM interworking (for debugging only)"), cl::init(true))
Function Alias Analysis Results
This instruction constructs a fixed permutation of two input vectors.
bool isTargetCOFF() const
Definition: ARMSubtarget.h:709
unsigned getValNo() const
CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool isVarArg) const
unsigned getAlignment() const
int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG)
SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND, extending load, or BUILD_VECTOR with extended elements, return the unextended value.
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:323
static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG, const ARMSubtarget *ST, const SDLoc &dl)
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
static bool isVMOVNMask(ArrayRef< int > M, EVT VT, bool Top)
auto count_if(R &&Range, UnaryPredicate P) -> typename std::iterator_traits< decltype(adl_begin(Range))>::difference_type
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1256
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition: APInt.h:1294
MO_GOT - On a symbol operand, this represents a GOT relative relocation.
Definition: ARMBaseInfo.h:262
bool useNaClTrap() const
Definition: ARMSubtarget.h:676
const Use & getOperandUse(unsigned i) const
Definition: User.h:182
unsigned second
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
bool hasDLLImportStorageClass() const
Definition: GlobalValue.h:265
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1182
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
GlobalVariable * getGlobalVariable(StringRef Name) const
Look up the specified global variable in the module symbol table.
Definition: Module.h:391
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:865
static uint32_t Concat[]
static bool isLegalMVEShuffleOp(unsigned PFEntry)
STATISTIC(NumFunctions, "Total number of functions")
unsigned const TargetRegisterInfo * TRI
A debug info location.
Definition: DebugLoc.h:33
static SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes, TargetLowering::DAGCombinerInfo &DCI)
bool hasV6Ops() const
Definition: ARMSubtarget.h:571
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
void setIsDead(bool Val=true)
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:141
F(f)
TypeSize getScalarValueSizeInBits() const
static SDValue isVMOVModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, unsigned SplatBitSize, SelectionDAG &DAG, const SDLoc &dl, EVT &VT, bool is128Bits, VMOVModImmType type)
isVMOVModifiedImm - Check if the specified splat value corresponds to a valid vector constant for a N...
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS)
Helper function to make it easier to build Select&#39;s if you just have operands and don&#39;t want to check...
ThreeOps_match< V1_t, V2_t, Mask_t, Instruction::ShuffleVector > m_ShuffleVector(const V1_t &v1, const V2_t &v2, const Mask_t &m)
Matches ShuffleVectorInst.
An instruction for reading from memory.
Definition: Instructions.h:169
static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformADDCombineWithOperands - Try DAG combinations for an ADD with operands N0 and N1...
bool isThumb1Only() const
Definition: ARMSubtarget.h:767
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:181
bool hasExternalWeakLinkage() const
Definition: GlobalValue.h:446
bool isTargetMuslAEABI() const
Definition: ARMSubtarget.h:730
[US]{MIN/MAX} - Binary minimum or maximum or signed or unsigned integers.
Definition: ISDOpcodes.h:445
const SDNodeFlags getFlags() const
an instruction that atomically reads a memory location, combines it with another value, and then stores the result back.
Definition: Instructions.h:710
SDNode * getNode() const
get the SDNode which holds the desired result
#define R2(n)
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:624
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:230
bool hasAcquireRelease() const
Definition: ARMSubtarget.h:631
bool alignLoopsWithOptSize() const override
Should loops be aligned even when the function is marked OptSize (but not MinSize).
static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND, ISD::ZERO_EXTEND, and ISD::ANY_EXTEND.
#define op(i)
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
Same for subtraction.
Definition: ISDOpcodes.h:255
Global Offset Table, PC Relative.
Synchronized with respect to signal handlers executing in the same thread.
Definition: LLVMContext.h:50
Value * CallOperandVal
If this is the result output operand or a clobber, this is null, otherwise it is the incoming operand...
bool isOperationLegalOrCustom(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
static IntegerType * getInt16Ty(LLVMContext &C)
Definition: Type.cpp:179
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) const override
createFastISel - This method returns a target specific FastISel object, or null if the target does no...
Thread Pointer Offset.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
static SDValue PerformANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue CombineBaseUpdate(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
CombineBaseUpdate - Target-specific DAG combine function for VLDDUP, NEON load/store intrinsics...
bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const override
Return true if it is profitable for dag combiner to transform a floating point op of specified opcode...
static SDValue PerformShiftCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST)
PerformShiftCombine - Checks for immediate versions of vector shifts and lowers them.
bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override
Return true if a truncation from FromTy to ToTy is permitted when deciding whether a call is in tail ...
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:39
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
bool lowerInterleavedLoad(LoadInst *LI, ArrayRef< ShuffleVectorInst *> Shuffles, ArrayRef< unsigned > Indices, unsigned Factor) const override
Lower an interleaved load into a vldN intrinsic.
bool isTargetHardFloat() const
uint64_t High
constexpr bool isMask_32(uint32_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:441
static bool isThumb(const MCSubtargetInfo &STI)
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:414
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
Definition: ISDOpcodes.h:842
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset...
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:496
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
static Optional< bool > isBigEndian(const ArrayRef< int64_t > ByteOffsets, int64_t FirstOffset)
INT = FGETSIGN(FP) - Return the sign bit of the specified floating point value as an integer 0/1 valu...
Definition: ISDOpcodes.h:370
static bool allUsersAreInFunction(const Value *V, const Function *F)
Return true if all users of V are within function F, looking through ConstantExprs.
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1566
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
SDValue getExternalSymbol(const char *Sym, EVT VT)
return AArch64::GPR64RegClass contains(Reg)
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:130
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:160
bool isTruncatingStore() const
Return true if the op does a truncation before store.
bool isMemLoc() const
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, unsigned Factor) const override
Lower an interleaved store into a vstN intrinsic.
MachineJumpTableInfo * getOrCreateJumpTableInfo(unsigned JTEntryKind)
getOrCreateJumpTableInfo - Get the JumpTableInfo for this function, if it does already exist...
bool genExecuteOnly() const
Definition: ARMSubtarget.h:680
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode *> &Visited, SmallVectorImpl< const SDNode *> &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
bool needsCustom() const
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
const ARMBaseInstrInfo * getInstrInfo() const override
Definition: ARMSubtarget.h:526
bool hasV8MBaselineOps() const
Definition: ARMSubtarget.h:582
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize=false) const override
isFPImmLegal - Returns true if the target can instruction select the specified FP immediate natively...
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic...
Definition: ISDOpcodes.h:115
bool hasStructRetAttr() const
Determine if the function returns a structure through first or second pointer argument.
Definition: Function.h:611
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:211
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1246
static SDValue PerformHWLoopCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST)
unsigned countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:156
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:136
bool isTargetELF() const
Definition: ARMSubtarget.h:710
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:195
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations...
Definition: ISDOpcodes.h:517
unsigned countTrailingZeros() const
Count the number of trailing zero bits.
Definition: APInt.h:1689
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
The address of a basic block.
Definition: Constants.h:840
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:48
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
LowerAsmOperandForConstraint - Lower the specified operand into the Ops vector.
const MCPhysReg * getCalleeSavedRegs(const MachineFunction *MF) const override
Code Generation virtual methods...
MO_COFFSTUB - On a symbol operand "FOO", this indicates that the reference is actually to the "...
Definition: ARMBaseInfo.h:259
bool hasOneUse() const
Return true if there is exactly one use of this node.
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
static bool isFloatingPointZero(SDValue Op)
isFloatingPointZero - Return true if this is +0.0.
A description of a memory reference used in the backend.
amdgpu aa AMDGPU Address space based Alias Analysis Wrapper
MO_SBREL - On a symbol operand, this represents a static base relative relocation.
Definition: ARMBaseInfo.h:266
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:369
static SDValue PerformLOADCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:80
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:235
const HexagonInstrInfo * TII
bool hasARMOps() const
Definition: ARMSubtarget.h:606
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
Function * getSSPStackGuardCheck(const Module &M) const override
If the target has a standard stack protection check function that performs validation and error handl...
static SDValue LowerVECTOR_SHUFFLE_i1(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a vldN-lane (N > 1) intrinsic...
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:460
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:162
Shift and rotation operations.
Definition: ISDOpcodes.h:471
static bool isVUZPMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:429
static bool isUpperSaturate(const SDValue LHS, const SDValue RHS, const SDValue TrueVal, const SDValue FalseVal, const ISD::CondCode CC, const SDValue K)
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:140
LLVMContext & getContext() const
Get the global data context.
Definition: Module.h:245
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef...
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth...
Definition: ISDOpcodes.h:454
A Use represents the edge between a Value definition and its users.
Definition: Use.h:55
static SDValue findMUL_LOHI(SDValue V)
static bool isVREVMask(ArrayRef< int > M, EVT VT, unsigned BlockSize)
isVREVMask - Check if a vector shuffle corresponds to a VREV instruction with the specified blocksize...
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s), MachineInstr opcode, and operands.
static bool isVZIP_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of "vector_shuffle v...
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
Definition: Type.cpp:659
Value * emitStoreConditional(IRBuilder<> &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const override
Perform a store-conditional operation to Addr.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, int64_t Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object...
Definition: SelectionDAG.h:866
CallLoweringInfo & setChain(SDValue InChain)
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:192
static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG)
PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for ISD::VECTOR_SHUFFLE.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
separate const offset from Split GEPs to a variadic base and a constant offset for better CSE
static SDValue SearchLoopIntrinsic(SDValue N, ISD::CondCode &CC, int &Imm, bool &Negate)
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
CopyToReg - This node has three operands: a chain, a register number to set to this value...
Definition: ISDOpcodes.h:171
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:197
op_iterator op_end() const
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
LLVM_NODISCARD R Default(T Value)
Definition: StringSwitch.h:181
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
FLT_ROUNDS_ - Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest 2 Round to ...
Definition: ISDOpcodes.h:607
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:660
This file contains the simple types necessary to represent the attributes associated with functions a...
void insertSSPDeclarations(Module &M) const override
Inserts necessary declarations for SSP (stack protection) purpose.
SimpleValueType SimpleTy
bool CC_ARM_APCS_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl)
getZeroVector - Returns a vector of specified type with all zero elements.
The memory access is dereferenceable (i.e., doesn&#39;t trap).
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN, ptr, amt) For double-word atomic operations: ValLo, ValHi, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amtLo, amtHi) ValLo, ValHi, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN, ptr, amtLo, amtHi) These correspond to the atomicrmw instruction.
Definition: ISDOpcodes.h:871
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:426
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:485
uint64_t decodeVMOVModImm(unsigned ModImm, unsigned &EltBits)
decodeVMOVModImm - Decode a NEON/MVE modified immediate value into the element value and the element ...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
ReplaceNodeResults - Replace the results of node with an illegal result type with new values built ou...
static bool isSingletonVEXTMask(ArrayRef< int > M, EVT VT, unsigned &Imm)
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:423
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
static bool isZeroExtended(SDNode *N, SelectionDAG &DAG)
isZeroExtended - Check if a node is a vector value that is zero-extended or a constant BUILD_VECTOR w...
static ShiftOpc getShiftOpcForNode(unsigned Opcode)
static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic, int64_t &Cnt)
isVShiftRImm - Check if this is a valid build_vector for the immediate operand of a vector shift righ...
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG...
Definition: ISDOpcodes.h:74
uint64_t getNumElements() const
For scalable vectors, this will return the minimum number of elements in the vector.
Definition: DerivedTypes.h:398
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
SmallVectorImpl< unsigned > & getCallSiteLandingPad(MCSymbol *Sym)
Get the call site indexes for a landing pad EH symbol.
bool hasPerfMon() const
Definition: ARMSubtarget.h:647
LocInfo getLocInfo() const
static SDValue PerformMULCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:994
#define im(i)
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
static StructType * get(LLVMContext &Context, ArrayRef< Type *> Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition: Type.cpp:346
This file implements a class to represent arbitrary precision integral constant values and operations...
void SplitString(StringRef Source, SmallVectorImpl< StringRef > &OutFragments, StringRef Delimiters=" \\\)
SplitString - Split up the specified string according to the specified delimiters, appending the result fragments to the output list.
static unsigned getStOpcode(unsigned StSize, bool IsThumb1, bool IsThumb2)
Return the store opcode for a given store size.
static mvt_range integer_fixedlen_vector_valuetypes()
unsigned getArgRegsSaveSize() const
This represents a list of ValueType&#39;s that has been intern&#39;d by a SelectionDAG.
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPostIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mo...
This class is used to represent an MSTORE node.
bool isGVIndirectSymbol(const GlobalValue *GV) const
True if the GV will be accessed via an indirect symbol.
SmallVector< ISD::InputArg, 32 > Ins
AtomicOrdering
Atomic ordering for LLVM&#39;s memory model.
static SDValue AddCombineTo64bitMLAL(SDNode *AddeSubeNode, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:756
bool CC_ARM_APCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx, unsigned &Cost) const override
Return true if the target can combine store(extractelement VectorTy, Idx).
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned TargetFlags=0) const
static const unsigned PerfectShuffleTable[6561+1]
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2096
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1590
int64_t getSExtValue() const
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:435
This is a fast-path instruction selection class that generates poor code and doesn&#39;t support illegal ...
Definition: FastISel.h:66
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only...
Definition: ISDOpcodes.h:353
Value * emitLoadLinked(IRBuilder<> &Builder, Value *Addr, AtomicOrdering Ord) const override
Perform a load-linked operation on Addr, returning a "Value *" with the corresponding pointee type...
static bool isIdentityMask(ArrayRef< int > Mask)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
unsigned getNextStackOffset() const
getNextStackOffset - Return the next stack offset such that all stack slots satisfy their alignment r...
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2101
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1632
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:423
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Returns true if it is beneficial to convert a load of a constant to just the constant itself...
Constant * createSequentialMask(IRBuilder<> &Builder, unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:246
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:420
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose...
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:539
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
bool RetCC_ARM_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool hasDSP() const
Definition: ARMSubtarget.h:675
OUTCHAIN = EH_SJLJ_SETUP_DISPATCH(INCHAIN) The target initializes the dispatch table here...
Definition: ISDOpcodes.h:119
static bool isZeroVector(SDValue N)
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
SDValue getRegisterMask(const uint32_t *RegMask)
static bool getMVEIndexedAddressParts(SDNode *Ptr, EVT VT, unsigned Align, bool isSEXTLoad, bool IsMasked, bool isLE, SDValue &Base, SDValue &Offset, bool &isInc, SelectionDAG &DAG)
static SDValue promoteToConstantPool(const ARMTargetLowering *TLI, const GlobalValue *GV, SelectionDAG &DAG, EVT PtrVT, const SDLoc &dl)
unsigned createVMOVModImm(unsigned OpCmode, unsigned Val)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:424
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:126
CallingConv::ID getLibcallCallingConv(RTLIB::Libcall Call) const
Get the CallingConv that should be used for the specified libcall.
static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG)
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:490
void setMinFunctionAlignment(Align Alignment)
Set the target&#39;s minimum function alignment.
static bool isSRL16(const SDValue &Op)
This contains information for each constraint that we are lowering.
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:202
void addInRegsParamInfo(unsigned RegBegin, unsigned RegEnd)
static bool isStore(int Opcode)
static SDValue PerformVCMPCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
SmallVector< ISD::OutputArg, 32 > Outs
bool expandABS(SDNode *N, SDValue &Result, SelectionDAG &DAG) const
Expand ABS nodes.
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
Definition: CallSite.h:279
bool useNEONForSinglePrecisionFP() const
Definition: ARMSubtarget.h:622
bool hasFPAO() const
Definition: ARMSubtarget.h:651
bool hasV6T2Ops() const
Definition: ARMSubtarget.h:574
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC&#39;s if you just have an ISD::CondCode instead of an SD...
Definition: SelectionDAG.h:996
LLVM_NODISCARD size_t size() const
size - Get the string size.
Definition: StringRef.h:151
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:125
CallLoweringInfo & setZExtResult(bool Value=true)
bool isTargetDarwin() const
Definition: ARMSubtarget.h:700
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:911
bool hasV8_1MMainlineOps() const
Definition: ARMSubtarget.h:584
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
An instruction for storing to memory.
Definition: Instructions.h:331
static SDValue LowerCONCAT_VECTORS_i1(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
void setReg(Register Reg)
Change the register this operand corresponds to.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out...
Definition: ISDOpcodes.h:1047
op_iterator op_begin() const
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:347
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2009
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:604
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:808
static SDValue ParseBFI(SDNode *N, APInt &ToMask, APInt &FromMask)
static EVT getVectorTyFromPredicateVector(EVT VT)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
bool hasFP16() const
Definition: ARMSubtarget.h:688
static Function * getFunction(Constant *C)
Definition: Evaluator.cpp:258
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Definition: DerivedTypes.h:71
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< SDValue > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
Definition: ISDOpcodes.h:1080
bool isStrongerThanMonotonic(AtomicOrdering ao)
static const MCPhysReg GPRArgRegs[]
Value * CreateZExtOrBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2111
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:152
static SDValue PerformAddeSubeCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
Function * getDeclaration(Module *M, ID id, ArrayRef< Type *> Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1112
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
static SDValue PerformSplittingToWideningLoad(SDNode *N, SelectionDAG &DAG)
static SDValue LowerBUILD_VECTOR_i1(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
MVT getVectorElementType() const
std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type cast(const Y &Val)
Definition: Casting.h:249
Value * getOperand(unsigned i) const
Definition: User.h:169
Analysis containing CSE Info
Definition: CSEInfo.cpp:25
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
static SDValue FindBFIToCombineWith(SDNode *N)
Class to represent pointers.
Definition: DerivedTypes.h:579
unsigned getByValSize() const
UNDEF - An undefined node.
Definition: ISDOpcodes.h:179
This class is used to represent ISD::STORE nodes.
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:585
bool isReadOnly(const GlobalValue *GV) const
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:19
static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG)
PerformVMOVDRRCombine - Target-specific dag combine xforms for ARMISD::VMOVDRR.
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:139
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a vector with the specified, possibly variable...
Definition: ISDOpcodes.h:381
unsigned getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1387
TargetInstrInfo - Interface to description of machine instruction set.
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const uint32_t * getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const override
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
bool hasDivideInARMMode() const
Definition: ARMSubtarget.h:627
AddrOpc getAM2Op(unsigned AM2Opc)
bool isTargetWatchABI() const
Definition: ARMSubtarget.h:703
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Get a value with high bits set.
Definition: APInt.h:652
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:231
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits...
static void emitPostLd(MachineBasicBlock *BB, MachineBasicBlock::iterator Pos, const TargetInstrInfo *TII, const DebugLoc &dl, unsigned LdSize, unsigned Data, unsigned AddrIn, unsigned AddrOut, bool IsThumb1, bool IsThumb2)
Emit a post-increment load operation with given size.
static EVT getExtensionTo64Bits(const EVT &OrigVT)
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
bool isOSWindows() const
Tests whether the OS is Windows.
Definition: Triple.h:539
bool isFloatTy() const
Return true if this is &#39;float&#39;, a 32-bit IEEE fp type.
Definition: Type.h:147
The memory access is volatile.
void setReturnRegsCount(unsigned s)
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:661
static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
void setPrefLoopAlignment(Align Alignment)
Set the target&#39;s preferred loop alignment.
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space...
Definition: DataLayout.cpp:769
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
unsigned getSchedClass() const
Return the scheduling class for this instruction.
Definition: MCInstrDesc.h:619
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata *> MDs)
Definition: Metadata.h:1174
const SDValue & getBasePtr() const
static SDValue PerformPREDICATE_CASTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:42
static SDValue PerformADDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:435
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:167
static SDValue PerformBFICombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
bool isAllOnesValue() const
Determine if all bits are set.
Definition: APInt.h:400
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:687
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition: ISDOpcodes.h:86
bool preferISHSTBarriers() const
Definition: ARMSubtarget.h:656
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
CodeGenOpt::Level getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
bool useSoftFloat() const override
constexpr bool isUInt< 8 >(uint64_t x)
Definition: MathExtras.h:379
unsigned const MachineRegisterInfo * MRI
static bool isSignExtended(SDNode *N, SelectionDAG &DAG)
isSignExtended - Check if a node is a vector value that is sign-extended or a constant BUILD_VECTOR w...
static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT, bool isSEXTLoad, SDValue &Base, SDValue &Offset, bool &isInc, SelectionDAG &DAG)
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
unsigned countPopulation() const
Count the number of bits set.
Definition: APInt.h:1715
const uint32_t * getThisReturnPreservedMask(const MachineFunction &MF, CallingConv::ID) const
getThisReturnPreservedMask - Returns a call preserved mask specific to the case that &#39;returned&#39; is on...
bool shouldAssumeDSOLocal(const Module &M, const GlobalValue *GV) const
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:465
An array constant whose element type is a simple 1/2/4/8-byte integer or float/double, and whose elements are just simple data values (i.e.
Definition: Constants.h:690
static SDValue PerformVDIVCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
PerformVDIVCombine - VCVT (fixed-point to floating-point, Advanced SIMD) can replace combinations of ...
Machine Value Type.
Value * getCalledValue() const
Definition: InstrTypes.h:1298
Value * concatenateVectors(IRBuilder<> &Builder, ArrayRef< Value *> Vecs)
Concatenate a list of vectors.
static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op, ArrayRef< int > ShuffleMask, SelectionDAG &DAG)
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, TargetLowering::DAGCombinerInfo &DCI, bool AllOnes=false)
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:115
bool isOptionalDef() const
Set if this operand is a optional def.
Definition: MCInstrDesc.h:104
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, unsigned &PrefAlign) const override
Return true if the pointer arguments to CI should be aligned by aligning the object whose address is ...
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass...
bool killsRegister(Register Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr kills the specified register.
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:63
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type...
CCAssignFn * CCAssignFnForReturn(CallingConv::ID CC, bool isVarArg) const
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
Simple binary floating point operators.
Definition: ISDOpcodes.h:295
KnownBits zext(unsigned BitWidth, bool ExtendedBitsAreKnownZero) const
Extends the underlying known Zero and One bits.
Definition: KnownBits.h:135
bool isMClass() const
Definition: ARMSubtarget.h:770
void setTargetDAGCombine(ISD::NodeType NT)
Targets should invoke this method for each target independent node that they want to provide a custom...
bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
bool isThumbImmShiftedVal(unsigned V)
isThumbImmShiftedVal - Return true if the specified value can be obtained by left shifting a 8-bit im...
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:275
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1205
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:158
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:41
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:65
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE...
Definition: ISDOpcodes.h:789
bool isFloatingPointOperation() const
Definition: Instructions.h:842
const SDValue & getOperand(unsigned Num) const
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1022
static SDValue LowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG)
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL...
Definition: ISDOpcodes.h:386
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static bool isSHL16(const SDValue &Op)
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:224
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the &#39;usesCustomInserter&#39; fla...
ARM_AAPCS - ARM Architecture Procedure Calling Standard calling convention (aka EABI).
Definition: CallingConv.h:110
static cl::opt< unsigned > ConstpoolPromotionMaxSize("arm-promote-constant-max-size", cl::Hidden, cl::desc("Maximum size of constant to promote into a constant pool"), cl::init(64))
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:486
static SDValue PerformSplittingToNarrowingStores(StoreSDNode *St, SelectionDAG &DAG)
bool isTargetWatchOS() const
Definition: ARMSubtarget.h:702
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:263
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
Definition: SelectionDAG.h:883
static bool isReverseMask(ArrayRef< int > M, EVT VT)
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
bool isAcquireOrStronger(AtomicOrdering ao)
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
static bool isZeroOrAllOnes(SDValue N, bool AllOnes)
ShuffleOpCodes
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:592
bool hasFPRegs16() const
Definition: ARMSubtarget.h:588
const SDValue & getOffset() const
CombineLevel
Definition: DAGCombine.h:15
static bool isLowerSaturate(const SDValue LHS, const SDValue RHS, const SDValue TrueVal, const SDValue FalseVal, const ISD::CondCode CC, const SDValue K)
static mvt_range fp_valuetypes()
unsigned getPrefTypeAlignment(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:765
static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC)
IntCCToARMCC - Convert a DAG integer condition code to an ARM CC.
static SDValue LowerSADDSUBSAT(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static bool BitsProperlyConcatenate(const APInt &A, const APInt &B)
bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override
Return true if it is profitable to move this shift by a constant amount though its operand...
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:470
This file declares a class to represent arbitrary precision floating point values and provide a varie...
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:165
This class provides iterator support for SDUse operands that use a specific SDNode.
static SDValue LowerCTPOP(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
bool useMulOps() const
Definition: ARMSubtarget.h:637
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
static SDValue LowerSDIV_v4i16(SDValue N0, SDValue N1, const SDLoc &dl, SelectionDAG &DAG)
static bool isSRA16(const SDValue &Op)
bool isHalfTy() const
Return true if this is &#39;half&#39;, a 16-bit IEEE fp type.
Definition: Type.h:144
bool definesRegister(Register Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr fully defines the specified register.
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition: ValueTypes.h:58
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
constexpr double e
Definition: MathExtras.h:57
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
bool CombineTo(SDValue O, SDValue N)
int getT2SOImmVal(unsigned Arg)
getT2SOImmVal - Given a 32-bit immediate, if it is something that can fit into a Thumb-2 shifter_oper...
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using &#39;From&#39; to use &#39;To&#39; instead.
match_combine_or< CastClass_match< OpTy, Instruction::ZExt >, CastClass_match< OpTy, Instruction::SExt > > m_ZExtOrSExt(const OpTy &Op)
static bool isLowerSaturatingConditional(const SDValue &Op, SDValue &V, SDValue &SatK)
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:314
static SDValue AddCombineVUZPToVPADDL(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
void computeMaxCallFrameSize(const MachineFunction &MF)
Computes the maximum size of a callframe and the AdjustsStack property.
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:828
const APInt & getAPIntValue() const
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
const Triple & getTargetTriple() const
unsigned convertAddSubFlagsOpcode(unsigned OldOpc)
Map pseudo instructions that imply an &#39;S&#39; bit onto real opcodes.
static void expandf64Toi32(SDValue Op, SelectionDAG &DAG, SDValue &RetVal1, SDValue &RetVal2)
Value * getPointerOperand()
Definition: Instructions.h:295
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:58
static bool resultsCompatible(CallingConv::ID CalleeCC, CallingConv::ID CallerCC, MachineFunction &MF, LLVMContext &C, const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn CalleeFn, CCAssignFn CallerFn)
Returns true if the results of the two calling conventions are compatible.
DEBUGTRAP - Trap intended to get the attention of a debugger.
Definition: ISDOpcodes.h:831
std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const override
Return the largest legal super-reg register class of the register class for the specified type and it...
int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const override
getScalingFactorCost - Return the cost of the scaling used in addressing mode represented by AM...
SDValue PerformCMOVToBFICombine(SDNode *N, SelectionDAG &DAG) const
bool shouldInsertFencesForAtomic(const Instruction *I) const override
Whether AtomicExpandPass should automatically insert fences and reduce ordering for this atomic...
arg_iterator arg_begin()
Definition: Function.h:706
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:381
static cl::opt< bool > EnableConstpoolPromotion("arm-promote-constant", cl::Hidden, cl::desc("Enable / disable promotion of unnamed_addr constants into " "constant pools"), cl::init(false))
self_iterator getIterator()
Definition: ilist_node.h:81
unsigned getOriginalAlignment() const
Returns alignment and volatility of the memory access.
The memory access is non-temporal.
bool hasVFP2Base() const
Definition: ARMSubtarget.h:608
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
const SDValue & get() const
If implicit conversion to SDValue doesn&#39;t work, the get() method returns the SDValue.
Class to represent integer types.
Definition: DerivedTypes.h:40
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y)...
static MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:324
bool allowsUnalignedMem() const
Definition: ARMSubtarget.h:799
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:780
static void ReplaceCMP_SWAP_64Results(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
void print(raw_ostream &OS, bool IsStandalone=true, bool SkipOpers=false, bool SkipDebugLoc=false, bool AddNewLine=true, const TargetInstrInfo *TII=nullptr) const
Print this MI to OS.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
static SDValue PerformARMBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
Target-specific dag combine xforms for ARMISD::BUILD_VECTOR.
const ARMSubtarget * getSubtarget() const
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
lazy value info
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
bool isOSBinFormatMachO() const
Tests whether the environment is MachO.
Definition: Triple.h:634
const MachineInstrBuilder & addFrameIndex(int Idx) const
unsigned getInRegsParamsProcessed() const
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:477
static SDValue PerformVDUPLANECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
PerformVDUPLANECombine - Target-specific dag combine xforms for ARMISD::VDUPLANE. ...
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo...
Definition: ISDOpcodes.h:857
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:223
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:610
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
bool RetCC_ARM_AAPCS_VFP(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool isCheapToSpeculateCttz() const override
Return true if it is cheap to speculate a call to intrinsic cttz.
SmallPtrSet< const GlobalVariable *, 2 > & getGlobalsPromotedToConstantPool()
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
bool isAPCS_ABI() const
std::vector< ArgListEntry > ArgListTy
bool hasVFP3Base() const
Definition: ARMSubtarget.h:609
Extended Value Type.
Definition: ValueTypes.h:34
static SDValue PerformORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformORCombine - Target-specific dag combine xforms for ISD::OR.
bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:434
static SDValue AddCombineTo64bitUMAAL(SDNode *AddeNode, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, SDValue N1, MutableArrayRef< int > Mask, SelectionDAG &DAG) const
Tries to build a legal vector shuffle using the provided parameters or equivalent variations...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:298
unsigned countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the most significant bit to the least stopping at the first 1...
Definition: MathExtras.h:225
bool useSjLjEH() const
Definition: ARMSubtarget.h:677
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2562
static SDValue PerformORCombineToSMULWBT(SDNode *OR, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
bool isPositionIndependent() const
This structure contains all information that is necessary for lowering calls.
size_t size() const
Definition: SmallVector.h:52
static bool isLTorLE(ISD::CondCode CC)
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
Definition: SelectionDAG.h:591
auto find(R &&Range, const T &Val) -> decltype(adl_begin(Range))
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1203
PointerType * getInt8PtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer to an 8-bit integer value.
Definition: IRBuilder.h:433
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:224
static ARMConstantPoolMBB * Create(LLVMContext &C, const MachineBasicBlock *mbb, unsigned ID, unsigned char PCAdj)
bool isVolatile() const
const TargetMachine & getTargetMachine() const
This class contains a discriminated union of information about pointers in memory operands...
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
static SDValue LowerSDIV_v4i8(SDValue X, SDValue Y, const SDLoc &dl, SelectionDAG &DAG)
unsigned getNumOperands() const
Return the number of values used by this operation.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void setLibcallCallingConv(RTLIB::Libcall Call, CallingConv::ID CC)
Set the CallingConv that should be used for the specified libcall.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
const MCPhysReg * getCalleeSavedRegsViaCopy(const MachineFunction *MF) const
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2005
const std::string & getConstraintString() const
Definition: InlineAsm.h:81
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:40
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:987
bool isLegalInterleavedAccessType(unsigned Factor, VectorType *VecTy, const DataLayout &DL) const
Returns true if VecTy is a legal interleaved access type.
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands...
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:43
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
unsigned first
void dump() const
Definition: AsmWriter.cpp:4470
void setCmpLibcallCC(RTLIB::Libcall Call, ISD::CondCode CC)
Override the default CondCode to be used to test the result of the comparison libcall against zero...
The memory access writes data.
static const int BlockSize
Definition: TarWriter.cpp:33
bool RetCC_ARM_APCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool isReleaseOrStronger(AtomicOrdering ao)
bool use_empty() const
Return true if there are no uses of this node.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type...
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Libcall getFPEXT(EVT OpVT, EVT RetVT)
getFPEXT - Return the FPEXT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
size_type size() const
Definition: SmallPtrSet.h:92
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specific constraint if it is set.
Definition: MCInstrDesc.h:212
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:266
TokenFactor - This node takes multiple tokens as input and produces a single token result...
Definition: ISDOpcodes.h:51
void dump() const
Dump this node, for debugging.
bool readsRegister(Register Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr reads the specified register.
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:426
SDValue getTargetConstantPool(const Constant *C, EVT VT, unsigned Align=0, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:666
Iterator for intrusive lists based on ilist_node.
void setPromotedConstpoolIncrease(int Sz)
CCState - This class holds information needed while lowering arguments and return values...
int getSOImmVal(unsigned Arg)
getSOImmVal - Given a 32-bit immediate, if it is something that can fit into an shifter_operand immed...
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:650
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:443
static SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG)
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
static SDValue AddCombineTo64BitSMLAL16(SDNode *AddcNode, SDNode *AddeNode, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
BlockVerifier::State From
static MachineOperand t1CondCodeOp(bool isDead=false)
Get the operand corresponding to the conditional code result for Thumb1.
bool hasFP64() const
Definition: ARMSubtarget.h:646
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:390
bool useSoftFloat() const
Definition: ARMSubtarget.h:764
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:567
ARM_AAPCS_VFP - Same as ARM_AAPCS, but uses hard floating point ABI.
Definition: CallingConv.h:113
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:393
bool hasFPARMv8Base() const
Definition: ARMSubtarget.h:611
static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG)
bool isShuffleMaskLegal(ArrayRef< int > M, EVT VT) const override
isShuffleMaskLegal - Targets can use this to indicate that they only support some VECTOR_SHUFFLE oper...
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:267
bool isTargetAEABI() const
Definition: ARMSubtarget.h:720
This struct is a compact representation of a valid (power of two) or undefined (0) alignment...
Definition: Alignment.h:117
Like SetCC, ops #0 and #1 are the LHS and RHS operands to compare, but op #2 is a boolean indicating ...
Definition: ISDOpcodes.h:512
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:212
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:223
ARM_APCS - ARM Procedure Calling Standard calling convention (obsolete, but still used on some target...
Definition: CallingConv.h:106
const SDValue & getMask() const
static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt)
Getvshiftimm - Check if this is a valid build_vector for the immediate operand of a vector shift oper...
ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
virtual unsigned getMaxSupportedInterleaveFactor() const
Get the maximum supported factor for interleaved memory accesses.
static uint64_t add(uint64_t LeftOp, uint64_t RightOp)
Definition: FileCheck.cpp:208
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
MachineOperand class - Representation of each machine instruction operand.
CallLoweringInfo & setCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
static SDValue LowerShift(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
Module.h This file contains the declarations for the Module class.
static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG, const EVT &OrigTy, const EVT &ExtTy, unsigned ExtOpcode)
AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total value size to 64 bits...
bool hasVFP4Base() const
Definition: ARMSubtarget.h:610
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
bool hasMVEFloatOps() const
Definition: ARMSubtarget.h:586
bool isFPBrccSlow() const
Definition: ARMSubtarget.h:645
const InstrItineraryData * getInstrItineraryData() const override
getInstrItins - Return the instruction itineraries based on subtarget selection.
Definition: ARMSubtarget.h:830
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:758
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
Provides information about what library functions are available for the current target.
void addCallSiteInfo(const SDNode *CallNode, CallSiteInfoImpl &&CallInfo)
void dump() const
CCValAssign - Represent assignment of one arg/retval to a location.
bool isTargetAndroid() const
Definition: ARMSubtarget.h:751
constexpr size_t array_lengthof(T(&)[N])
Find the length of an array.
Definition: STLExtras.h:1027
static void ReplaceREADCYCLECOUNTER(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
iterator end() const
Definition: ArrayRef.h:147
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:96
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:755
bool hasMPExtension() const
Definition: ARMSubtarget.h:674
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:702
const DataFlowGraph & G
Definition: RDFGraph.cpp:202
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
bool hasNEON() const
Definition: ARMSubtarget.h:612
SI Lower i1 Copies
const SDValue & getPassThru() const
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:644
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:474
bool useFPVFMx() const
Definition: ARMSubtarget.h:639
static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:635
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:374
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:355
This is an abstract virtual class for memory operations.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
virtual Function * getSSPStackGuardCheck(const Module &M) const
If the target has a standard stack protection check function that performs validation and error handl...
unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm)
const Constant * getConstVal() const
static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG)
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2547
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
CallLoweringInfo & setSExtResult(bool Value=true)
unsigned getAM2Offset(unsigned AM2Opc)
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:704
bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override
Return true if SHIFT instructions should be expanded to SHIFT_PARTS instructions, and false if a libr...
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:250
FunctionCallee getOrInsertFunction(StringRef Name, FunctionType *T, AttributeList AttributeList)
Look up the specified function in the module symbol table.
Definition: Module.cpp:143
Represents one node in the SelectionDAG.
static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode, bool &swpCmpOps, bool &swpVselOps)
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
int64_t getImm() const
KnownBits sext(unsigned BitWidth) const
Sign extends the underlying known Zero and One bits.
Definition: KnownBits.h:145
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:703
const Function & getFunction() const
Return the LLVM function that this machine code represents.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target&#39;s TargetSubtargetInf...
unsigned logBase2() const
Definition: APInt.h:1805
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
static mvt_range integer_valuetypes()
The access may modify the value stored in memory.
static bool isS16(const SDValue &Op, SelectionDAG &DAG)
MachinePointerInfo getWithOffset(int64_t O) const
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:585
Function * getFunction(StringRef Name) const
Look up the specified function in the module symbol table.
Definition: Module.cpp:174
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
unsigned getVectorNumElements() const
Definition: DerivedTypes.h:566
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:163
bool isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
static SDValue PerformABSCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
size_t use_size() const
Return the number of uses of this node.
unsigned getPreferredAlignment(const GlobalVariable *GV) const
Returns the preferred alignment of the specified global.
Definition: DataLayout.cpp:837
static unsigned isNEONTwoResultShuffleMask(ArrayRef< int > ShuffleMask, EVT VT, unsigned &WhichResult, bool &isV_UNDEF)
Check if ShuffleMask is a NEON two-result shuffle (VZIP, VUZP, VTRN), and return the corresponding AR...
bool ExpandInlineAsm(CallInst *CI) const override
This hook allows the target to expand an inline asm call to be explicit llvm code if it wants to...
Class to represent vector types.
Definition: DerivedTypes.h:432
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:55
static SDValue PerformSTORECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformSTORECombine - Target-specific dag combine xforms for ISD::STORE.
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT...
Definition: ValueTypes.h:73
void setIndexedLoadAction(unsigned IdxMode, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
EVT getMemoryVT() const
Return the type of the in-memory value.
Target - Wrapper for Target specific information.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
Class for arbitrary precision integers.
Definition: APInt.h:69
unsigned getByValAlign() const
CodeModel::Model getCodeModel() const
Returns the code model.
static SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG)
Instruction * makeDMB(IRBuilder<> &Builder, ARM_MB::MemBOpt Domain) const
iterator_range< use_iterator > uses()
amdgpu Simplify well known AMD library false FunctionCallee Callee
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
A "pseudo-class" with methods for operating on BUILD_VECTORs.
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:481
BBTy * getParent() const
Get the basic block containing the call site.
Definition: CallSite.h:101
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:243
bool isPowerOf2() const
Check if this APInt&#39;s value is a power of two greater than zero.
Definition: APInt.h:468
static use_iterator use_end()
Align getABIAlignmentForCallingConv(Type *ArgTy, DataLayout DL) const override
Return the correct alignment for the current calling convention.
typename SuperClass::iterator iterator
Definition: SmallVector.h:319
iterator_range< user_iterator > users()
Definition: Value.h:420
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:529
bool CC_ARM_Win32_CFGuard_Check(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
static SDValue PerformAddcSubcCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1300
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:532
bool isTailCall() const
bool genLongCalls() const
Definition: ARMSubtarget.h:679
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:467
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:366
int getMaskElt(unsigned Idx) const
bool isROPI() const
static bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, MachineFrameInfo &MFI, const MachineRegisterInfo *MRI, const TargetInstrInfo *TII)
MatchingStackOffset - Return true if the given stack call argument is already available in the same p...
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:283
void setArgumentStackSize(unsigned size)
static SDValue PerformXORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
bool hasVMLxForwarding() const
Definition: ARMSubtarget.h:644
static bool isVTBLMask(ArrayRef< int > M, EVT VT)
bool isEmpty() const
Returns true if there are no itineraries.
Flags
Flags values. These may be or&#39;d together.
bool isCheapToSpeculateCtlz() const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:271
static SDValue PerformVCVTCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD) can replace combinations of ...
bool isUnknown() const
Returns true if we don&#39;t know any bits.
Definition: KnownBits.h:62
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
The memory access reads data.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
isLegalAddressingMode - Return true if the addressing mode represented by AM is legal for this target...
Value * CreateTruncOrBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2129
Section Relative (Windows TLS)
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:696
static SDValue ConvertBooleanCarryToCarryFlag(SDValue BoolCarry, SelectionDAG &DAG)
bool isThumb2() const
Definition: ARMSubtarget.h:768
Representation of each machine instruction.
Definition: MachineInstr.h:63
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer, a SRCVALUE for the destination, and a SRCVALUE for the source.
Definition: ISDOpcodes.h:785
bool HasLowerConstantMaterializationCost(unsigned Val1, unsigned Val2, const ARMSubtarget *Subtarget, bool ForCodesize=false)
Returns true if Val1 has a lower Constant Materialization Cost than Val2.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
const Triple & getTargetTriple() const
Definition: ARMSubtarget.h:698
Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
bool useFPVFMx16() const
Definition: ARMSubtarget.h:642
static CondCodes getOppositeCondition(CondCodes CC)
Definition: ARMBaseInfo.h:48
Represents a use of a SDNode.
static bool canChangeToInt(SDValue Op, bool &SeenZero, const ARMSubtarget *Subtarget)
canChangeToInt - Given the fp compare operand, return true if it is suitable to morph to an integer c...
bool hasRetAddrStack() const
Definition: ARMSubtarget.h:672
void setVarArgsFrameIndex(int Index)
SmallVector< SDValue, 32 > OutVals
bool is64BitVector() const
Return true if this is a 64-bit vector type.
Definition: ValueTypes.h:177
static SDValue PerformVMOVRRDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformVMOVRRDCombine - Target-specific dag combine xforms for ARMISD::VMOVRRD.
static SDValue LowerINSERT_VECTOR_ELT_i1(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static bool LowerToByteSwap(CallInst *CI)
Try to replace a call instruction with a call to a bswap intrinsic.
unsigned getNumArgOperands() const
Definition: InstrTypes.h:1244
bool CheckReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
CheckReturn - Analyze the return values of a function, returning true if the return can be performed ...
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:151
bool isTargetIOS() const
Definition: ARMSubtarget.h:701
const uint32_t * getSjLjDispatchPreservedMask(const MachineFunction &MF) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const unsigned char * bytes_begin() const
Definition: StringRef.h:126
Constant * getOrInsertGlobal(StringRef Name, Type *Ty, function_ref< GlobalVariable *()> CreateGlobalCallback)
Look up the specified global in the module symbol table.
Definition: Module.cpp:204
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:448
bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
int32_t getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements, uint32_t BitWidth) const
If this is a constant FP splat and the splatted constant FP is an exact power or 2, return the log base 2 integer value.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB &#39;Other&#39; at the position From, and insert it into this MBB right before &#39;...
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:729
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:244
static MachineOperand condCodeOp(unsigned CCReg=0)
Get the operand corresponding to the conditional code result.
bool isStrongDefinitionForLinker() const
Returns true if this global&#39;s definition will be the one chosen by the linker.
Definition: GlobalValue.h:546
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:180
unsigned getLocMemOffset() const
ObjectFormatType getObjectFormat() const
getFormat - Get the object format for this triple.
Definition: Triple.h:327
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
static bool isVUZP_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of "vector_shuffle v...
bool isEHPad() const
Returns true if the block is a landing pad.
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:207
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:182
StringRef getValueAsString() const
Return the attribute&#39;s value as a string.
Definition: Attributes.cpp:220
virtual unsigned isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const
If the specified machine instruction is a direct load from a stack slot, return the virtual or physic...
static void attachMEMCPYScratchRegs(const ARMSubtarget *Subtarget, MachineInstr &MI, const SDNode *Node)
Attaches vregs to MEMCPY that it will use as scratch registers when it is expanded into LDM/STM...
static SDValue LowerInterruptReturn(SmallVectorImpl< SDValue > &RetOps, const SDLoc &DL, SelectionDAG &DAG)
static SDValue LowerPredicateLoad(SDValue Op, SelectionDAG &DAG)
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value...
Definition: APInt.h:486
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:547
bool isVectorLoadExtDesirable(SDValue ExtVal) const override
Return true if folding a vector load into ExtVal (a sign, zero, or any extend node) is profitable...
ARMFunctionInfo - This class is derived from MachineFunctionInfo and contains private ARM-specific in...
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:44
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:614
static bool isLegalT2AddressImmediate(int64_t V, EVT VT, const ARMSubtarget *Subtarget)
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:672
PointerUnion< const Value *, const PseudoSourceValue * > ptrVal
TargetOptions Options
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return &#39;Legal&#39;) or we ...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation.
Definition: InstrTypes.h:1305
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
static bool isLegalAddressImmediate(int64_t V, EVT VT, const ARMSubtarget *Subtarget)
isLegalAddressImmediate - Return true if the integer value can be used as the offset of the target ad...
void setArgRegsSaveSize(unsigned s)
static MachineOperand CreateImm(int64_t Val)
static SDValue LowerEXTRACT_VECTOR_ELT_i1(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
void AnalyzeReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeReturn - Analyze the returned values of a return, incorporating info about the result values i...
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:425
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
static void emitPostSt(MachineBasicBlock *BB, MachineBasicBlock::iterator Pos, const TargetInstrInfo *TII, const DebugLoc &dl, unsigned StSize, unsigned Data, unsigned AddrIn, unsigned AddrOut, bool IsThumb1, bool IsThumb2)
Emit a post-increment store operation with given size.
Flags getFlags() const
Return the raw flags of the source value,.
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1233
const ARMBaseRegisterInfo * getRegisterInfo() const override
Definition: ARMSubtarget.h:538
static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, ARMCC::CondCodes &CondCode2)
FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
bool hasLOB() const
Definition: ARMSubtarget.h:619
The memory access always returns the same value (or traps).
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
static cl::opt< unsigned > ConstpoolPromotionMaxTotal("arm-promote-constant-max-total", cl::Hidden, cl::desc("Maximum size of ALL constants to promote into a constant pool"), cl::init(128))
void setStackPointerRegisterToSaveRestore(unsigned R)
If set to a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save and restore.
unsigned InferPtrAlignment(SDValue Ptr) const
Infer alignment of a load / store address.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
bool hasFPRegs() const
Definition: ARMSubtarget.h:587
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:274
uint32_t Size
Definition: Profile.cpp:46
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding &#39;Y&#39; from a larger floating point type down to the precision ...
Definition: ISDOpcodes.h:342
Rename collisions when linking (static functions).
Definition: GlobalValue.h:55
Same for multiplication.
Definition: ISDOpcodes.h:258
static SDValue PerformUMLALCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
const SDValue & getBasePtr() const
bool isLegalAddImmediate(int64_t Imm) const override
isLegalAddImmediate - Return true if the specified immediate is legal add immediate, that is the target has add instructions which can add a register and the immediate without having to materialize the immediate into a register.
static const int LAST_INDEXED_MODE
Definition: ISDOpcodes.h:993
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value *> Args=None, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2419
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT, bool isSEXTLoad, SDValue &Base, SDValue &Offset, bool &isInc, SelectionDAG &DAG)
unsigned getOpcode() const
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:666
SDValue getValue(unsigned R) const
unsigned getInRegsParamsCount() const
bool hasV5TOps() const
Definition: ARMSubtarget.h:569
virtual Value * getSDagStackGuard(const Module &M) const
Return the variable that&#39;s previously inserted by insertSSPDeclarations, if any, otherwise return nul...
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:382
void setMinStackArgumentAlignment(Align Alignment)
Set the minimum stack alignment of an argument.
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:353
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.setjmp intrinsic.
Definition: ISDOpcodes.h:109
SDValue PerformBRCONDCombine(SDNode *N, SelectionDAG &DAG) const
PerformBRCONDCombine - Target-specific DAG combining for ARMISD::BRCOND.
unsigned getAlignment() const
Return the alignment of the access that is being performed FIXME: Remove this function once transitio...
Definition: Instructions.h:376
static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:176
bool hasOptionalDef(QueryType Type=IgnoreBundle) const
Set if this instruction has an optional definition, e.g.
Definition: MachineInstr.h:662
bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded, TargetLoweringOpt &TLO) const override
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2091
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getCondCode(ISD::CondCode Cond)
Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
bool hasMinSize() const
Definition: ARMSubtarget.h:766
bool isRegLoc() const
unsigned getPrefLoopLogAlignment() const
Definition: ARMSubtarget.h:878
static bool isGTorGE(ISD::CondCode CC)
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition: Type.h:185
bool isTargetMachO() const
Definition: ARMSubtarget.h:711
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:301
const MachinePointerInfo & getPointerInfo() const
void reset()
Definition: Optional.h:251
static SDValue PerformVLDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align=0, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, uint64_t Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:399
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if &#39;Op & Mask&#39; is known to be zero.
MachineConstantPoolValue * getMachineCPVal() const
bool CC_ARM_AAPCS_VFP(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
static SDValue PerformORCombineToBFI(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
bool hasFnAttribute(Attribute::AttrKind Kind) const
Equivalent to hasAttribute(AttributeList::FunctionIndex, Kind) but may be faster. ...
static RTLIB::Libcall getDivRemLibcall(const SDNode *N, MVT::SimpleValueType SVT)
bool isLegalICmpImmediate(int64_t Imm) const override
isLegalICmpImmediate - Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructions which can compare a register against the immediate without having to materialize the immediate into a register.
bool hasAtomicStore() const
Return true if this atomic instruction stores to memory.
bool preferIncOfAddToSubOfNot(EVT VT) const override
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add&#39;s is IR...
static bool isVTRN_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of "vector_shuffle v...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Special calling convention on Windows for calling the Control Guard Check ICall funtion.
Definition: CallingConv.h:87
static bool isConditionalZeroOrAllOnes(SDNode *N, bool AllOnes, SDValue &CC, bool &Invert, SDValue &OtherOp, SelectionDAG &DAG)
void insert(iterator MBBI, MachineBasicBlock *MBB)
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
This class is used to represent an MLOAD node.
void setReturnAddressIsTaken(bool s)
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
void getInRegsParamInfo(unsigned InRegsParamRecordIndex, unsigned &BeginReg, unsigned &EndReg) const
int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
TypeSize getScalarSizeInBits() const
Definition: ValueTypes.h:304
static bool checkAndUpdateCPSRKill(MachineBasicBlock::iterator SelectItr, MachineBasicBlock *BB, const TargetRegisterInfo *TRI)
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
CallLoweringInfo & setInRegister(bool Value=true)
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo)
ArrayRef< int > getMask() const
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:575
LLVM Value Representation.
Definition: Value.h:74
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:356
Integer reductions may have a result type larger than the vector element type.
Definition: ISDOpcodes.h:937
static SDValue LowerPredicateStore(SDValue Op, SelectionDAG &DAG)
SDValue getRegister(unsigned Reg, EVT VT)
bool supportsTailCall() const
Definition: ARMSubtarget.h:797
unsigned getResNo() const
get the index which selects a specific result in the SDNode
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:42
SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
unsigned countTrailingOnes(T Value, ZeroBehavior ZB=ZB_Width)
Count the number of ones from the least significant bit to the first zero bit.
Definition: MathExtras.h:514
virtual void finalizeLowering(MachineFunction &MF) const
Execute target specific actions to finalize target lowering.
bool useLoadStackGuardNode() const override
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:69
static unsigned getLdOpcode(unsigned LdSize, bool IsThumb1, bool IsThumb2)
Return the load opcode for a given load size.
virtual void insertSSPDeclarations(Module &M) const
Inserts necessary declarations for SSP (stack protection) purpose.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
void push_back(MachineBasicBlock *MBB)
static SDValue LowerADDSUBCARRY(SDValue Op, SelectionDAG &DAG)
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC&#39;s if you just have an ISD::CondCode instead of an...
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:279
ARMTargetLowering(const TargetMachine &TM, const ARMSubtarget &STI)
SDValue getValueType(EVT)
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
const MCOperandInfo * OpInfo
Definition: MCInstrDesc.h:199
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:80
int getOperandCycle(unsigned ItinClassIndx, unsigned OperandIdx) const
Return the cycle for the given class and operand.
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:837
bool functionArgumentNeedsConsecutiveRegisters(Type *Ty, CallingConv::ID CallConv, bool isVarArg) const override
Returns true if an argument of type Ty needs to be passed in a contiguous block of registers in calli...
MO_NONLAZY - This is an independent flag, on a symbol operand "FOO" it represents a symbol which...
Definition: ARMBaseInfo.h:284
bool hasMVEIntegerOps() const
Definition: ARMSubtarget.h:585
bool isUndef() const
Return true if the type of the node type undefined.
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.h:333
bool isRWPI() const
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint letter, return the type of constraint it is for this target...
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1321
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
Definition: Constants.h:703
static SDValue PerformSHLSimplify(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST)
unsigned getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG)
void rewindByValRegsInfo()
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone...
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc, or post-dec.
bool isTargetWindows() const
Definition: ARMSubtarget.h:707
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:65
unsigned createJumpTableIndex(const std::vector< MachineBasicBlock *> &DestBBs)
createJumpTableIndex - Create a new jump table.
static mvt_range fixedlen_vector_valuetypes()
Type * getElementType() const
Definition: DerivedTypes.h:399
IRTranslator LLVM IR MI
const APFloat & getValueAPF() const
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
constexpr bool isShiftedMask_32(uint32_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (32 bit ver...
Definition: MathExtras.h:453
ISD::LoadExtType getExtensionType() const
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:54
static SDValue PerformORCombine_i1(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:504
void RemoveOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with...
bool isOSVersionLT(unsigned Major, unsigned Minor=0, unsigned Micro=0) const
isOSVersionLT - Helper function for doing comparisons against version numbers included in the target ...
Definition: Triple.h:415
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
Type * getArrayElementType() const
Definition: Type.h:368
static MachineBasicBlock * OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ)
unsigned countLeadingZeros() const
The APInt version of the countLeadingZeros functions in MathExtras.h.
Definition: APInt.h:1653
static BranchProbability getZero()
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
bool isFNegFree(EVT VT) const override
Return true if an fneg operation is free to the point where it is never worthwhile to replace it with...
bool operator==(uint64_t V1, const APInt &V2)
Definition: APInt.h:2024
unsigned getNumOperands() const
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:199
bool hasAnyDataBarrier() const
Definition: ARMSubtarget.h:633
Register getReg() const
getReg - Returns the register number.
bool isStrictFPOpcode()
Test if this node is a strict floating point pseudo-op.
static bool isVolatile(Instruction *Inst)
Conversion operators.
Definition: ISDOpcodes.h:526
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for this result type with this index.
const SDValue & getOperand(unsigned i) const
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:850
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it&#39;s implicit...
uint64_t getZExtValue() const
bool isBigEndian() const
Definition: DataLayout.h:233
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:535
static X86::CondCode getSwappedCondition(X86::CondCode CC)
Assuming the flags are set by MI(a,b), return the condition code if we modify the instructions such t...
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:126
bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:429
bool isWindowsMSVCEnvironment() const
Checks if the environment could be MSVC.
Definition: Triple.h:549
bool isBitFieldInvertedMask(unsigned v)
static SDValue CombineANDShift(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
bool FastCC_ARM_APCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
#define LLVM_DEBUG(X)
Definition: Debug.h:122
unsigned AllocateReg(unsigned Reg)
AllocateReg - Attempt to allocate one register.
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override
Returns how the given (atomic) load should be expanded by the IR-level AtomicExpand pass...
bool isAAPCS_ABI() const
bool hasDataBarrier() const
Definition: ARMSubtarget.h:628
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:431
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand *> NewMemRefs)
Mutate the specified machine node&#39;s memory references to the provided list.
Instruction * emitLeadingFence(IRBuilder<> &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
OutputIt copy(R &&Range, OutputIt Out)
Definition: STLExtras.h:1234
static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &dl)
GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit the specified operations t...
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
ExpandBITCAST - If the target supports VFP, this function is called to expand a bit convert where eit...
bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override
Returns true if the given (atomic) store should be expanded by the IR-level AtomicExpand pass into an...
static SDValue AddCombineBUILD_VECTORToVPADDL(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
bool isDoubleTy() const
Return true if this is &#39;double&#39;, a 64-bit IEEE fp type.
Definition: Type.h:150
static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
unsigned getMaxSupportedInterleaveFactor() const override
Get the maximum supported factor for interleaved memory accesses.
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:640
VectorType * getType() const
Overload to return most specific vector type.
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:64
static bool isSaturatingConditional(const SDValue &Op, SDValue &V, uint64_t &K, bool &usat)
Value * getPointerOperand()
Definition: Instructions.h:429
Function Alias Analysis false
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:178
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:846
The operation is expected to be selectable directly by the target, and no transformation is necessary...
Definition: LegalizerInfo.h:48
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:319
LLVMContext * getContext() const
Definition: SelectionDAG.h:430
static ARMConstantPoolConstant * Create(const Constant *C, unsigned ID)
bool isTruncateFree(Type *SrcTy, Type *DstTy) const override
Return true if it&#39;s free to truncate a value of type FromTy to type ToTy.
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:429
static bool isSplatMask(const int *Mask, EVT VT)
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
unsigned getConstantPoolIndex(const Constant *C, unsigned Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one...
static bool isVEXTMask(ArrayRef< int > M, EVT VT, bool &ReverseVEXT, unsigned &Imm)
static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign, unsigned AlignCheck)
Type * getElementType() const
Definition: DerivedTypes.h:598
bool hasThumb2() const
Definition: ARMSubtarget.h:769
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
std::vector< MachineBasicBlock * >::iterator succ_iterator
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL, bool LegalTypes=true) const
static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG)
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:243
static void ReplaceLongIntrinsic(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:153
IntegerType * Int32Ty
bool isArrayTy() const
True if this is an instance of ArrayType.
Definition: Type.h:221
unsigned getSize() const
Return the number of bytes in the encoding of this instruction, or zero if the encoding size cannot b...
Definition: MCInstrDesc.h:623
This file describes how to lower LLVM code to machine code.
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
void setIndexedStoreAction(unsigned IdxMode, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
FloatABI::ABIType FloatABIType
FloatABIType - This setting is set by -float-abi=xxx option is specfied on the command line...
bool shouldSinkOperands(Instruction *I, SmallVectorImpl< Use *> &Ops) const override
Check if sinking I&#39;s operands to I&#39;s basic block is profitable, because the operands can be folded in...
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:985
static TargetLowering::ArgListTy getDivRemArgList(const SDNode *N, LLVMContext *Context, const ARMSubtarget *Subtarget)
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:441
bool hasFullFP16() const
Definition: ARMSubtarget.h:690
void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
This class is used to represent ISD::LOAD nodes.
static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V)
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary...
Definition: ISDOpcodes.h:681